diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index f1de048ea4..486ee6ccb3 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -29,11 +29,14 @@ jobs: # we don't know what commit the last tag was it's safer to get entire repo so previousStableVersion resolves fetch-depth: 0 - - name: Set up python 2 - uses: actions/setup-python@v2 + - name: Set up python 3 + uses: actions/setup-python@v3 with: - python-version: '2.x' + python-version: '3.x' architecture: 'x64' + cache: 'pip' + cache-dependency-path: '**/requirements.txt' + - run: pip install -r python/requirements.txt - name: Set up JDK ${{ matrix.jdk }} uses: actions/setup-java@v2 @@ -79,4 +82,4 @@ jobs: name: play-${{ github.sha }} if-no-files-found: error path: | - ./framework/dist/* \ No newline at end of file + ./framework/dist/* diff --git a/framework/build.xml b/framework/build.xml index ed3dcbb399..c182b18155 100644 --- a/framework/build.xml +++ b/framework/build.xml @@ -253,7 +253,7 @@ - + diff --git a/framework/pym/play/application.py b/framework/pym/play/application.py index cb79af335f..644d041708 100644 --- a/framework/pym/play/application.py +++ b/framework/pym/play/application.py @@ -1,3 +1,6 @@ +from __future__ import print_function +from builtins import input +from builtins import object import sys import os import os.path @@ -24,23 +27,23 @@ def __init__(self, application_path, env, ignoreMissingModules = False): # only parse conf it is exists - if it should be there, it will be caught later # (depends on command) confExists = os.path.exists(os.path.join(self.path, 'conf', 'application.conf')); - if application_path is not None and confExists: + if application_path != None and confExists: confFolder = os.path.join(application_path, 'conf/') try: self.conf = PlayConfParser(confFolder, env) except Exception as err: - print "~ Failed to parse application configuration", err + print("~ Failed to parse application configuration", err) self.conf = None # No app / Invalid app else: self.conf = None self.play_env = env - if env.has_key('jpda.port'): + if 'jpda.port' in env: self.jpda_port = env['jpda.port'] else: self.jpda_port = self.readConf('jpda.port') - if env.has_key('jpda.address'): + if 'jpda.address' in env: self.jpda_address = env['jpda.address'] else: self.jpda_address = self.readConf('jpda.address') @@ -54,9 +57,9 @@ def check(self): assert os.path.exists(os.path.join(self.path, 'conf', 'routes')) assert os.path.exists(os.path.join(self.path, 'conf', 'application.conf')) except AssertionError: - print "~ Oops. conf/routes or conf/application.conf missing." - print "~ %s does not seem to host a valid application." % os.path.normpath(self.path) - print "~" + print("~ Oops. conf/routes or conf/application.conf missing.") + print("~ %s does not seem to host a valid application." % os.path.normpath(self.path)) + print("~") sys.exit(-1) def readConf(self, key): @@ -78,20 +81,20 @@ def modules(self): application_mode = "dev" if application_mode == 'dev': #Load docviewer module - modules.append(os.path.normpath(os.path.join(self.play_env["basedir"], 'modules/docviewer'))) + modules.append(os.path.normpath(os.path.join(self.play_env["basedir"], 'modules/docviewer'))) for m in self.readConfs('module.'): if '${play.path}' in m: m = m.replace('${play.path}', self.play_env["basedir"]) - if m[0] is not '/': + if m[0] != '/': m = os.path.normpath(os.path.join(self.path, m)) if not os.path.exists(m) and not self.ignoreMissingModules: - print "~ Oops," - print "~ Module not found: %s" % (m) - print "~" + print("~ Oops,") + print("~ Module not found: %s" % (m)) + print("~") if m.startswith('${play.path}/modules'): - print "~ You can try to install the missing module using 'play install %s'" % (m[21:]) - print "~" + print("~ You can try to install the missing module using 'play install %s'" % (m[21:])) + print("~") sys.exit(-1) modules.append(m) if self.path and os.path.exists(os.path.join(self.path, 'modules')): @@ -108,7 +111,7 @@ def modules(self): return set(modules) # Ensure we don't have duplicates def module_names(self): - return map(lambda x: x[7:],self.conf.getAllKeys("module.")) + return [x[7:] for x in self.conf.getAllKeys("module.")] def override(self, f, t): fromFile = None @@ -116,18 +119,18 @@ def override(self, f, t): pc = os.path.join(module, f) if os.path.exists(pc): fromFile = pc if not fromFile: - print "~ %s not found in any module" % f - print "~ " + print("~ %s not found in any module" % f) + print("~ ") sys.exit(-1) toFile = os.path.join(self.path, t) if os.path.exists(toFile): - response = raw_input("~ Warning! %s already exists and will be overridden (y/n)? " % toFile) + response = input("~ Warning! %s already exists and will be overridden (y/n)? " % toFile) if not response == 'y': return if not os.path.exists(os.path.dirname(toFile)): os.makedirs(os.path.dirname(toFile)) shutil.copyfile(fromFile, toFile) - print "~ Copied %s to %s " % (fromFile, toFile) + print("~ Copied %s to %s " % (fromFile, toFile)) def name(self): return self.readConf("application.name") @@ -207,15 +210,15 @@ def fw_cp_args(self): return cp_args def pid_path(self): - if self.play_env.has_key('pid_file'): + if 'pid_file' in self.play_env: return os.path.join(self.path, self.play_env['pid_file']) - elif os.environ.has_key('PLAY_PID_PATH'): + elif 'PLAY_PID_PATH' in os.environ: return os.environ['PLAY_PID_PATH'] else: return os.path.join(self.path, 'server.pid') def log_path(self): - if not os.environ.has_key('PLAY_LOG_PATH'): + if 'PLAY_LOG_PATH' not in os.environ: log_path = os.path.join(self.path, 'logs') else: log_path = os.environ['PLAY_LOG_PATH'] @@ -231,12 +234,12 @@ def check_jpda(self): else: s.bind((self.jpda_address, int(self.jpda_port))) s.close() - except socket.error, e: + except socket.error as e: if "disable_random_jpda" in self.play_env and self.play_env["disable_random_jpda"]: - print 'JPDA port %s is already used, and command line option "-f" was specified. Cannot start server\n' % self.jpda_port + print('JPDA port %s is already used, and command line option "-f" was specified. Cannot start server\n' % self.jpda_port) sys.exit(-1) else: - print 'JPDA port %s is already used. Will try to use any free port for debugging' % self.jpda_port + print('JPDA port %s is already used. Will try to use any free port for debugging' % self.jpda_port) self.jpda_port = 0 def java_args_memory(self, java_args): @@ -272,29 +275,29 @@ def java_cmd(self, java_args, cp_args=None, className='play.server.Server', args if cp_args is None: cp_args = self.cp_args() - if self.play_env.has_key('jpda.port'): + if 'jpda.port' in self.play_env: self.jpda_port = self.play_env['jpda.port'] - if self.play_env.has_key('jpda.address'): + if 'jpda.address' in self.play_env: self.jpda_address = self.play_env['jpda.address'] application_mode = self.readConf('application.mode').lower() if not application_mode: - print "~ Warning: no application.mode defined in you conf/application.conf. Using DEV mode." + print("~ Warning: no application.mode defined in you conf/application.conf. Using DEV mode.") application_mode = "dev" if application_mode == 'prod': java_args.append('-server') - if self.play_env.has_key('jvm_version'): + if 'jvm_version' in self.play_env: javaVersion = self.play_env['jvm_version'] else: javaVersion = getJavaVersion() - print "~ using java version \"%s\"" % javaVersion + print("~ using java version \"%s\"" % javaVersion) if javaVersion.startswith("1.5") or javaVersion.startswith("1.6") or javaVersion.startswith("1.7"): - print "~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion + print("~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion) java_args.append('-noverify') @@ -302,13 +305,13 @@ def java_cmd(self, java_args, cp_args=None, className='play.server.Server', args if java_policy != '': policyFile = os.path.join(self.path, 'conf', java_policy) if os.path.exists(policyFile): - print "~ using policy file \"%s\"" % policyFile + print("~ using policy file \"%s\"" % policyFile) java_args.append('-Djava.security.manager') java_args.append('-Djava.security.policy==%s' % policyFile) - if self.play_env.has_key('http.port'): + if 'http.port' in self.play_env: args += ["--http.port=%s" % self.play_env['http.port']] - if self.play_env.has_key('https.port'): + if 'https.port' in self.play_env: args += ["--https.port=%s" % self.play_env['https.port']] java_args.append('-Dfile.encoding=utf-8') @@ -345,7 +348,7 @@ def _absoluteToRelative(path, start): return os.path.curdir return os.path.join(*rel_list) -class PlayConfParser: +class PlayConfParser(object): DEFAULTS = { 'http.port': '9000', @@ -355,15 +358,15 @@ class PlayConfParser: def __init__(self, confFolder, env): self.id = env["id"] self.entries = self.readFile(confFolder, "application.conf") - if env.has_key('jpda.port'): + if 'jpda.port' in env: self.entries['jpda.port'] = env['jpda.port'] - if env.has_key('http.port'): + if 'http.port' in env: self.entries['http.port'] = env['http.port'] - if env.has_key('jvm_version'): + if 'jvm_version' in env: self.entries['jvm_version'] = env['jvm_version'] def readFile(self, confFolder, filename): - f = file(confFolder + filename) + f = open(confFolder + filename, 'r') result = dict() for line in f: linedef = line.strip() @@ -382,12 +385,12 @@ def readFile(self, confFolder, filename): washedResult = dict() # first get all keys with correct framework id - for (key, value) in result.items(): + for (key, value) in list(result.items()): if key.startswith('%' + self.id + '.'): stripedKey = key[(len(self.id)+2):] washedResult[stripedKey]=value # now get all without framework id if we don't already have it - for (key, value) in result.items(): + for (key, value) in list(result.items()): if not key.startswith('%'): # check if we already have it if not (key in washedResult): @@ -396,7 +399,7 @@ def readFile(self, confFolder, filename): # find all @include includeFiles = [] - for (key, value) in washedResult.items(): + for (key, value) in list(washedResult.items()): if key.startswith('@include.'): includeFiles.append(value) @@ -407,10 +410,10 @@ def readFile(self, confFolder, filename): fromIncludeFile = self.readFile(confFolder, self._expandValue(includeFile)) # add everything from include file - for (key, value) in fromIncludeFile.items(): + for (key, value) in list(fromIncludeFile.items()): washedResult[key]=value except Exception as err: - print "~ Failed to load included configuration %s: %s" % (includeFile, err) + print("~ Failed to load included configuration %s: %s" % (includeFile, err)) return washedResult @@ -423,7 +426,7 @@ def get(self, key): def getAllKeys(self, query): result = [] - for (key, value) in self.entries.items(): + for (key, value) in list(self.entries.items()): if key.startswith(query): result.append(key) return result diff --git a/framework/pym/play/cmdloader.py b/framework/pym/play/cmdloader.py index fa4406cecd..459f6850fc 100644 --- a/framework/pym/play/cmdloader.py +++ b/framework/pym/play/cmdloader.py @@ -1,6 +1,8 @@ +from __future__ import print_function import imp import os import warnings +import traceback def play_formatwarning(msg, *a): # ignore everything except the message @@ -9,7 +11,7 @@ def play_formatwarning(msg, *a): warnings.formatwarning = play_formatwarning -class CommandLoader: +class CommandLoader(object): def __init__(self, play_path): self.path = os.path.join(play_path, 'framework', 'pym', 'play', 'commands') self.commands = {} @@ -23,7 +25,9 @@ def load_core(self): name = filename.replace(".py", "") mod = load_python_module(name, self.path) self._load_cmd_from(mod) - except: + except Exception as e: + print (e) + traceback.print_exc() warnings.warn("!! Warning: could not load core command file " + filename, RuntimeWarning) def load_play_module(self, modname): @@ -33,10 +37,10 @@ def load_play_module(self, modname): leafname = os.path.basename(modname).split('.')[0] mod = imp.load_source(leafname, os.path.join(modname, "commands.py")) self._load_cmd_from(mod) - except Exception, e: - print '~' - print '~ !! Error while loading %s: %s' % (commands, e) - print '~' + except Exception as e: + print('~') + print('~ !! Error while loading %s: %s' % (commands, e)) + print('~') pass # No command to load in this module def _load_cmd_from(self, mod): @@ -57,6 +61,6 @@ def load_python_module(name, location): try: return imp.load_module(name, mod_desc[0], mod_desc[1], mod_desc[2]) finally: - if mod_file is not None and not mod_file.closed: + if mod_file != None and not mod_file.closed: mod_file.close() diff --git a/framework/pym/play/commands/ant.py b/framework/pym/play/commands/ant.py index 366415701c..5cba882638 100644 --- a/framework/pym/play/commands/ant.py +++ b/framework/pym/play/commands/ant.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os, os.path import shutil import time @@ -20,6 +21,6 @@ def execute(**kargs): shutil.copyfile(os.path.join(play_env["basedir"], 'resources/build.xml'), os.path.join(app.path, 'build.xml')) - print "~ OK, a build.xml file has been created" - print "~ Define the PLAY_PATH env property, and use it with ant run|start|stop" - print "~" + print("~ OK, a build.xml file has been created") + print("~ Define the PLAY_PATH env property, and use it with ant run|start|stop") + print("~") diff --git a/framework/pym/play/commands/autotest.py b/framework/pym/play/commands/autotest.py index 3b082bd8fa..22702c8e36 100644 --- a/framework/pym/play/commands/autotest.py +++ b/framework/pym/play/commands/autotest.py @@ -1,9 +1,10 @@ +from __future__ import print_function # Command related to execution: auto-test import sys import os, os.path import shutil -import urllib, urllib2 +import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse import subprocess import webbrowser import time @@ -28,14 +29,14 @@ def execute(**kargs): def autotest(app, args): app.check() - print "~ Running in test mode" - print "~ Ctrl+C to stop" - print "~ " + print("~ Running in test mode") + print("~ Ctrl+C to stop") + print("~ ") - print "~ Deleting %s" % os.path.normpath(os.path.join(app.path, 'tmp')) + print("~ Deleting %s" % os.path.normpath(os.path.join(app.path, 'tmp'))) if os.path.exists(os.path.join(app.path, 'tmp')): shutil.rmtree(os.path.join(app.path, 'tmp')) - print "~" + print("~") # Kill if exists http_port = 9000 @@ -46,16 +47,16 @@ def autotest(app, args): else: http_port = app.readConf('http.port') try: - proxy_handler = urllib2.ProxyHandler({}) - opener = urllib2.build_opener(proxy_handler) + proxy_handler = urllib.request.ProxyHandler({}) + opener = urllib.request.build_opener(proxy_handler) opener.open('http://localhost:%s/@kill' % http_port) - except Exception, e: + except Exception as e: pass # Do not run the app if SSL is configured and no cert store is configured keystore = app.readConf('keystore.file') if protocol == 'https' and not keystore: - print "https without keystore configured. play auto-test will fail. Exiting now." + print("https without keystore configured. play auto-test will fail. Exiting now.") sys.exit(-1) # read parameters @@ -94,25 +95,25 @@ def autotest(app, args): try: play_process = subprocess.Popen(java_cmd, env=os.environ, stdout=sout) except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) soutint = open(os.path.join(app.log_path(), 'system.out'), 'r') while True: if play_process.poll(): - print "~" - print "~ Oops, application has not started?" - print "~" + print("~") + print("~ Oops, application has not started?") + print("~") sys.exit(-1) line = soutint.readline().strip() if line: - print line + print(line) if line.find('Server is up and running') > -1: # This line is written out by Server.java to system.out and is not log file dependent soutint.close() break # Run FirePhoque - print "~" - print "~ Starting FirePhoque..." + print("~") + print("~ Starting FirePhoque...") headless_browser = '' if app.readConf('headlessBrowser'): @@ -134,25 +135,25 @@ def autotest(app, args): try: subprocess.call(java_cmd, env=os.environ) except OSError: - print "Could not execute the headless browser. " + print("Could not execute the headless browser. ") sys.exit(-1) - print "~" + print("~") time.sleep(1) # Kill if exists try: - proxy_handler = urllib2.ProxyHandler({}) - opener = urllib2.build_opener(proxy_handler) + proxy_handler = urllib.request.ProxyHandler({}) + opener = urllib.request.build_opener(proxy_handler) opener.open('%s://localhost:%s/@kill' % (protocol, http_port)) - except Exception, e: + except Exception as e: pass if os.path.exists(os.path.join(app.path, 'test-result/result.passed')): - print "~ All tests passed" - print "~" + print("~ All tests passed") + print("~") testspassed = True if os.path.exists(os.path.join(app.path, 'test-result/result.failed')): - print "~ Some tests have failed. See file://%s for results" % test_result - print "~" + print("~ Some tests have failed. See file://%s for results" % test_result) + print("~") sys.exit(1) diff --git a/framework/pym/play/commands/base.py b/framework/pym/play/commands/base.py index d427f3b30d..1763535e1d 100644 --- a/framework/pym/play/commands/base.py +++ b/framework/pym/play/commands/base.py @@ -1,11 +1,14 @@ +from __future__ import print_function # Command related to creation and execution: run, new, clean +from builtins import input +from builtins import str import sys import os import subprocess import shutil import getopt -import urllib2 +import urllib.request, urllib.error, urllib.parse import webbrowser import time import signal @@ -54,14 +57,14 @@ def new(app, args, env, cmdloader=None): withModules = a.split(',') if o in ('--name'): application_name = a - except getopt.GetoptError, err: - print "~ %s" % str(err) - print "~ Sorry, unrecognized option" - print "~ " + except getopt.GetoptError as err: + print("~ %s" % str(err)) + print("~ Sorry, unrecognized option") + print("~ ") sys.exit(-1) if os.path.exists(app.path): - print "~ Oops. %s already exists" % app.path - print "~" + print("~ Oops. %s already exists" % app.path) + print("~") sys.exit(-1) md = [] @@ -74,18 +77,18 @@ def new(app, args, env, cmdloader=None): if os.path.isdir(os.path.join(env["basedir"], 'modules/%s' % f)) and f.find('%s-' % m) == 0: dirname = f break - + if not dirname: - print "~ Oops. No module %s found" % m - print "~ Try to install it using 'play install %s'" % m - print "~" + print("~ Oops. No module %s found" % m) + print("~ Try to install it using 'play install %s'" % m) + print("~") sys.exit(-1) md.append(dirname) - print "~ The new application will be created in %s" % os.path.normpath(app.path) + print("~ The new application will be created in %s" % os.path.normpath(app.path)) if application_name is None: - application_name = raw_input("~ What is the application name? [%s] " % os.path.basename(app.path)) + application_name = input("~ What is the application name? [%s] " % os.path.basename(app.path)) if application_name == "": application_name = os.path.basename(app.path) copy_directory(os.path.join(env["basedir"], 'resources/application-skel'), app.path) @@ -94,9 +97,9 @@ def new(app, args, env, cmdloader=None): app.check() replaceAll(os.path.join(app.path, 'conf/application.conf'), r'%APPLICATION_NAME%', application_name) replaceAll(os.path.join(app.path, 'conf/application.conf'), r'%SECRET_KEY%', secretKey()) - print "~" + print("~") - # Configure modules + # Configure modules for m in md: # Check dependencies.yml of the module depsYaml = os.path.join(env["basedir"], 'modules/%s/conf/dependencies.yml' % m) @@ -107,13 +110,13 @@ def new(app, args, env, cmdloader=None): replaceAll(os.path.join(app.path, 'conf/dependencies.yml'), r'- play\n', '- play\n - %s\n' % moduleDefinition ) except Exception: pass - + cmdloader.commands['dependencies'].execute(command='dependencies', app=app, args=['--sync'], env=env, cmdloader=cmdloader) - print "~ OK, the application is created." - print "~ Start it with : play run %s" % sys.argv[2] - print "~ Have fun!" - print "~" + print("~ OK, the application is created.") + print("~ Start it with : play run %s" % sys.argv[2]) + print("~ Have fun!") + print("~") process = None @@ -131,19 +134,19 @@ def handle_sigint(signum, frame): if 'process' in globals(): if first_sigint: # Prefix with new line because ^C usually appears on the terminal - print "\nTerminating Java process" + print("\nTerminating Java process") process.terminate() first_sigint = False else: - print "\nKilling Java process" + print("\nKilling Java process") process.kill() - + def run(app, args): global process app.check() - - print "~ Ctrl+C to stop" - print "~ " + + print("~ Ctrl+C to stop") + print("~ ") java_cmd = app.java_cmd(args) try: process = subprocess.Popen (java_cmd, env=os.environ) @@ -153,46 +156,46 @@ def run(app, args): if 0 != return_code: sys.exit(return_code) except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) - print + print("~") def clean(app): app.check() tmp = app.readConf('play.tmp') if tmp is None or not tmp.strip(): tmp = 'tmp' - print "~ Deleting %s" % os.path.normpath(os.path.join(app.path, tmp)) + print("~ Deleting %s" % os.path.normpath(os.path.join(app.path, tmp))) if os.path.exists(os.path.join(app.path, tmp)): shutil.rmtree(os.path.join(app.path, tmp)) - print "~" + print("~") def show_modules(app, args): app.check() modules = app.modules() if len(modules): - print "~ Application modules are:" - print "~ " + print("~ Application modules are:") + print("~ ") for module in modules: - print "~ %s" % module + print("~ %s" % module) else: - print "~ No modules installed in this application" - print "~ " + print("~ No modules installed in this application") + print("~") sys.exit(0) def id(play_env): if not play_env["id"]: - print "~ framework ID is not set" - new_id = raw_input("~ What is the new framework ID (or blank to unset)? ") + print("~ framework ID is not set") + new_id = input("~ What is the new framework ID (or blank to unset)? ") if new_id: - print "~" - print "~ OK, the framework ID is now %s" % new_id - print "~" + print("~") + print("~ OK, the framework ID is now %s" % new_id) + print("~") open(play_env["id_file"], 'w').write(new_id) else: - print "~" - print "~ OK, the framework ID is unset" - print "~" + print("~") + print("~ OK, the framework ID is unset") + print("~") if os.path.exists(play_env["id_file"]): os.remove(play_env["id_file"]) @@ -203,13 +206,13 @@ def kill(pid): import ctypes handle = ctypes.windll.kernel32.OpenProcess(1, False, int(pid)) if not ctypes.windll.kernel32.TerminateProcess(handle, 0): - print "~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError()) - print "~ " + print("~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError())) + print("~ ") sys.exit(-1) else: try: os.kill(int(pid), 15) except OSError: - print "~ Play was not running (Process id %s not found)" % pid - print "~" + print("~ Play was not running (Process id %s not found)" % pid) + print("~") sys.exit(-1) diff --git a/framework/pym/play/commands/check.py b/framework/pym/play/commands/check.py index 5ec55595a2..e176cd2379 100644 --- a/framework/pym/play/commands/check.py +++ b/framework/pym/play/commands/check.py @@ -1,6 +1,10 @@ +from __future__ import print_function +from builtins import str +from builtins import range +from builtins import object import os, os.path import shutil -import urllib, urllib2 +import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse import simplejson as json from play.utils import * @@ -27,23 +31,23 @@ def execute(**kargs): releases = allreleases() if len(releases) == 0: - print "~ No release found." + print("~ No release found.") elif current == max(releases): - print "~ You are using the latest version." + print("~ You are using the latest version.") else: - print "~ \tLatest release: " + str(max(releases)) - print "~ \tYour version : " + str(current) - print "~" - print "~ Latest release download: " + max(releases).url() + print("~ \tLatest release: " + str(max(releases))) + print("~ \tYour version : " + str(current)) + print("~") + print("~ Latest release download: " + max(releases).url()) - print "~" + print("~") def allreleases(): try: - req = urllib2.Request(TAGS_URL) + req = urllib.request.Request(TAGS_URL) req.add_header('Accept', 'application/json') - opener = urllib2.build_opener() + opener = urllib.request.build_opener() result = opener.open(req) jsonObject = json.loads(result.read()) releases = [] @@ -51,18 +55,18 @@ def allreleases(): releases.append(Release(tagObj["name"])) return releases - except urllib2.HTTPError, e: - print "~ Oops," - print "~ Cannot contact github..." - print "~" + except urllib.error.HTTPError as e: + print("~ Oops,") + print("~ Cannot contact github...") + print("~") sys.exit(-1) - except urllib2.URLError, e: - print "~ Oops," - print "~ Cannot contact github..." - print "~" + except urllib.error.URLError as e: + print("~ Oops,") + print("~ Cannot contact github...") + print("~") sys.exit(-1) -class Release: +class Release(object): # TODO: Be smarter at analysing the rest (ex: RC1 vs RC2) def __init__(self, strversion): @@ -73,7 +77,7 @@ def __init__(self, strversion): self.numpart = '' self.rest = strversion.replace(self.numpart, "") try: - self.versions = map(lambda x: int(x), self.numpart.split(".")) + self.versions = [int(x) for x in self.numpart.split(".")] except: self.versions = [0,0] if not self.rest: self.rest = "Z" diff --git a/framework/pym/play/commands/classpath.py b/framework/pym/play/commands/classpath.py index a887c9a4de..b249fe2dd8 100644 --- a/framework/pym/play/commands/classpath.py +++ b/framework/pym/play/commands/classpath.py @@ -1,3 +1,4 @@ +from __future__ import print_function # Show the computed classpath for the application COMMANDS = ['cp', 'classpath'] @@ -10,7 +11,7 @@ def execute(**kargs): command = kargs.get("command") app = kargs.get("app") args = kargs.get("args") - print "~ Computed classpath is:" - print "~ " - print app.getClasspath() - print "~ " + print("~ Computed classpath is:") + print("~ ") + print(app.getClasspath()) + print("~ ") diff --git a/framework/pym/play/commands/daemon.py b/framework/pym/play/commands/daemon.py index 9f66612af3..f6b2512790 100644 --- a/framework/pym/play/commands/daemon.py +++ b/framework/pym/play/commands/daemon.py @@ -1,3 +1,6 @@ +from __future__ import print_function +from builtins import str +from builtins import range import errno import os import os.path @@ -44,12 +47,12 @@ def start(app, args): if os.path.exists(app.pid_path()): pid = open(app.pid_path()).readline().strip() if process_running(pid): - print "~ Oops. %s is already started (pid:%s)! (or delete %s)" % ( - os.path.normpath(app.path), pid, os.path.normpath(app.pid_path())) - print "~" + print("~ Oops. %s is already started (pid:%s)! (or delete %s)" % ( + os.path.normpath(app.path), pid, os.path.normpath(app.pid_path()))) + print("~") sys.exit(1) else: - print "~ removing pid file %s for not running pid %s" % (os.path.normpath(app.pid_path()), pid) + print("~ removing pid file %s for not running pid %s" % (os.path.normpath(app.pid_path()), pid)) os.remove(app.pid_path()) sysout = app.readConf('application.log.system.out') @@ -61,35 +64,35 @@ def start(app, args): try: pid = subprocess.Popen(app.java_cmd(args), stdout=sout, env=os.environ).pid except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) - print "~ OK, %s is started" % os.path.normpath(app.path) + print("~ OK, %s is started" % os.path.normpath(app.path)) if sysout: - print "~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out')) + print("~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out'))) pid_file = open(app.pid_path(), 'w') pid_file.write(str(pid)) - print "~ pid is %s" % pid - print "~" + print("~ pid is %s" % pid) + print("~") def stop(app): app.check() if not os.path.exists(app.pid_path()): - print "~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path) - print "~" + print("~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path)) + print("~") sys.exit(-1) pid = open(app.pid_path()).readline().strip() kill(pid) os.remove(app.pid_path()) - print "~ OK, %s is stopped" % app.path - print "~" + print("~ OK, %s is stopped" % app.path) + print("~") def restart(app, args): app.check() if not os.path.exists(app.pid_path()): - print "~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path) - print "~" + print("~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path)) + print("~") else: pid = open(app.pid_path()).readline().strip() os.remove(app.pid_path()) @@ -105,34 +108,34 @@ def restart(app, args): try: pid = subprocess.Popen(java_cmd, stdout=sout, env=os.environ).pid except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) - print "~ OK, %s is restarted" % os.path.normpath(app.path) + print("~ OK, %s is restarted" % os.path.normpath(app.path)) if sysout: - print "~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out')) + print("~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out'))) pid_file = open(app.pid_path(), 'w') pid_file.write(str(pid)) - print "~ New pid is %s" % pid - print "~" + print("~ New pid is %s" % pid) + print("~") sys.exit(0) def pid(app): app.check() if not os.path.exists(app.pid_path()): - print "~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path) - print "~" + print("~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path)) + print("~") sys.exit(-1) pid = open(app.pid_path()).readline().strip() - print "~ PID of the running applications is %s" % pid - print "~ " + print("~ PID of the running applications is %s" % pid) + print("~ ") def out(app): app.check() if not os.path.exists(os.path.join(app.log_path(), 'system.out')): - print "~ Oops! %s not found" % os.path.normpath(os.path.join(app.log_path(), 'system.out')) - print "~" + print("~ Oops! %s not found" % os.path.normpath(os.path.join(app.log_path(), 'system.out'))) + print("~") sys.exit(-1) sout = open(os.path.join(app.log_path(), 'system.out'), 'r') try: @@ -146,7 +149,7 @@ def out(app): time.sleep(1) sout.seek(where) else: - print line + print(line) def kill(pid): @@ -156,16 +159,16 @@ def kill(pid): process = ctypes.windll.kernel32.TerminateProcess(handle, 0) ctypes.windll.kernel32.CloseHandle(handle) if not process: - print "~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError()) - print "~ " + print("~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError())) + print("~ ") sys.exit(-1) - print "~ Process with PID %s terminated" % pid + print("~ Process with PID %s terminated" % pid) else: try: _terminate_unix_process_if_exists(int(pid)) except OSError: - print "~ Play was not running (Process id %s not found)" % pid - print "~" + print("~ Play was not running (Process id %s not found)" % pid) + print("~") sys.exit(-1) @@ -211,8 +214,8 @@ def process_list_nt(): else: proc_dict[instance] = 0 idProcessLocalizedName = win32pdhutil.find_pdh_counter_localized_name("ID Process") - for instance, max_instances in proc_dict.items(): - for inum in xrange(max_instances + 1): + for instance, max_instances in list(proc_dict.items()): + for inum in range(max_instances + 1): hq = win32pdh.OpenQuery() # initializes the query handle path = win32pdh.MakeCounterPath((None, processLocalizedName, instance, None, inum, idProcessLocalizedName)) counter_handle = win32pdh.AddCounter(hq, path) diff --git a/framework/pym/play/commands/deps.py b/framework/pym/play/commands/deps.py index d82da2b62f..2d2b91d046 100644 --- a/framework/pym/play/commands/deps.py +++ b/framework/pym/play/commands/deps.py @@ -1,6 +1,7 @@ +from __future__ import print_function import os, os.path import shutil -import urllib, urllib2 +import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse import subprocess import simplejson as json @@ -57,14 +58,14 @@ def execute(**kargs): add_options.append('-Dclearcache') if args.count('--jpda'): args.remove('--jpda') - print "~ Waiting for JPDA client to continue" + print("~ Waiting for JPDA client to continue") add_options.append('-Xdebug') add_options.append('-Xrunjdwp:transport=dt_socket,address=%s,server=y,suspend=y' % app.jpda_port) for arg in args: if arg.startswith("-D"): add_options.append(arg) elif not arg.startswith('-Xm'): - print "~ WARNING: " + arg + " argument will be skipped" + print("~ WARNING: " + arg + " argument will be skipped") java_cmd = [java_path()] + add_options + args_memory + ['-classpath', app.fw_cp_args(), 'play.deps.DependenciesManager'] try: @@ -72,5 +73,5 @@ def execute(**kargs): if 0 != return_code: sys.exit(return_code); except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) diff --git a/framework/pym/play/commands/eclipse.py b/framework/pym/play/commands/eclipse.py index 93211c16f2..01a6eb1b82 100644 --- a/framework/pym/play/commands/eclipse.py +++ b/framework/pym/play/commands/eclipse.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import str import os, os.path import shutil import time @@ -30,9 +32,9 @@ def execute(**kargs): javaVersion = getJavaVersion() - print "~ using java version \"%s\"" % javaVersion + print("~ using java version \"%s\"" % javaVersion) if javaVersion.startswith("1.5") or javaVersion.startswith("1.6") or javaVersion.startswith("1.7"): - print "~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion + print("~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion) vm_arguments = vm_arguments +' -noverify' @@ -77,7 +79,7 @@ def execute(**kargs): # pointers to source jars produced by 'play deps' src_file = os.path.join(lib_src, os.path.basename(el) + '.src') if os.path.exists(src_file): - f = file(src_file) + f = open(src_file, 'r') cpJarToSource[el] = f.readline().rstrip() f.close() @@ -93,10 +95,10 @@ def execute(**kargs): if el == playJarPath: cpXML += '\n\t' % (os.path.normpath(el) , playSourcePath) else: - if cpJarToSource.has_key(el): + if el in cpJarToSource: cpXML += '\n\t' % (os.path.normpath(el), cpJarToSource[el]) else: - if javadocLocation.has_key(el): + if el in javadocLocation: cpXML += '\n\t\t' % os.path.normpath(el) cpXML += '\n\t\t\t' f = file(javadocLocation[el]) @@ -156,11 +158,11 @@ def execute(**kargs): os.rename(os.path.join(app.path, 'eclipse/debug.launch'), os.path.join(app.path, 'eclipse/%s.launch' % application_name)) if is_application: - print "~ OK, the application \"%s\" is ready for eclipse" % application_name + print("~ OK, the application \"%s\" is ready for eclipse" % application_name) else: - print "~ OK, the module \"%s\" is ready for eclipse" % application_name - print "~ Use File/Import/General/Existing project to import %s into eclipse" % os.path.normpath(app.path) - print "~" - print "~ Use eclipsify again when you want to update eclipse configuration files." - print "~ However, it's often better to delete and re-import the project into your workspace since eclipse keeps dirty caches..." - print "~" + print("~ OK, the module \"%s\" is ready for eclipse" % application_name) + print("~ Use File/Import/General/Existing project to import %s into eclipse" % os.path.normpath(app.path)) + print("~") + print("~ Use eclipsify again when you want to update eclipse configuration files.") + print("~ However, it's often better to delete and re-import the project into your workspace since eclipse keeps dirty caches...") + print("~") diff --git a/framework/pym/play/commands/evolutions.py b/framework/pym/play/commands/evolutions.py index acf5eb26bc..27449296ab 100644 --- a/framework/pym/play/commands/evolutions.py +++ b/framework/pym/play/commands/evolutions.py @@ -1,6 +1,7 @@ +from __future__ import print_function import os, os.path import shutil -import urllib, urllib2 +import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse import subprocess import simplejson as json @@ -36,7 +37,7 @@ def execute(**kargs): add_options = ['-Dapplication.path=%s' % (app.path), '-Dframework.path=%s' % (play_env['basedir']), '-Dplay.id=%s' % play_env['id'], '-Dplay.version=%s' % play_env['version']] if args.count('--jpda'): - print "~ Waiting for JPDA client to continue" + print("~ Waiting for JPDA client to continue") args.remove('--jpda') add_options.append('-Xdebug') add_options.append('-Xrunjdwp:transport=dt_socket,address=%s,server=y,suspend=y' % app.jpda_port) @@ -52,5 +53,5 @@ def execute(**kargs): if 0 != return_code: sys.exit(return_code); except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) diff --git a/framework/pym/play/commands/help.py b/framework/pym/play/commands/help.py index 821428240b..2f4e4efa73 100644 --- a/framework/pym/play/commands/help.py +++ b/framework/pym/play/commands/help.py @@ -1,3 +1,4 @@ +from __future__ import print_function # Display help import sys, os, re @@ -19,7 +20,7 @@ def execute(**kargs): cmd = args[0] help_file = os.path.join(play_env["basedir"], 'documentation', 'commands', 'cmd-%s.txt' % cmd) if os.path.exists(help_file): - print open(help_file, 'r').read() + print(open(help_file, 'r').read()) else: exists = False slugCmd = re.sub('[-\s]+', '-', re.sub('[^\w\s-]', '', cmd.encode('ascii', 'ignore')).strip().lower()) @@ -30,38 +31,38 @@ def execute(**kargs): % slugCmd) exists = os.path.exists(help_file) if exists: - print open(help_file, 'r').read() + print(open(help_file, 'r').read()) break if not exists: - print '~ Oops, command \'%s\' not found. Try just \'play help\' to list all commands.' % cmd - print '~' + print('~ Oops, command \'%s\' not found. Try just \'play help\' to list all commands.' % cmd) + print('~') sys.exit(-1) else: main_help(cmdloader.commands, play_env) def main_help(commands, play_env): modules_commands = [] - print "~ For all commands, if the application is not specified, the current directory is used" - print "~ Use 'play help cmd' to get more help on a specific command" - print "~" - print "~ Core commands:" - print "~ ~~~~~~~~~~~~~~" + print("~ For all commands, if the application is not specified, the current directory is used") + print("~ Use 'play help cmd' to get more help on a specific command") + print("~") + print("~ Core commands:") + print("~ ~~~~~~~~~~~~~~") for cmd in sorted(commands): if not isCore(commands[cmd], play_env): modules_commands.append(cmd) continue if 'HELP' in dir(commands[cmd]) and cmd in commands[cmd].HELP: - print "~ " + cmd + (' ' * (16 - len(cmd))) + commands[cmd].HELP[cmd] + print("~ " + cmd + (' ' * (16 - len(cmd))) + commands[cmd].HELP[cmd]) if len(modules_commands) > 0: - print "~" - print "~ Modules commands:" - print "~ ~~~~~~~~~~~~~~~~~" + print("~") + print("~ Modules commands:") + print("~ ~~~~~~~~~~~~~~~~~") for cmd in modules_commands: if 'HELP' in dir(commands[cmd]) and cmd in commands[cmd].HELP: - print "~ " + cmd + (' ' * (20 - len(cmd))) + commands[cmd].HELP[cmd] - print "~" - print "~ Also refer to documentation at https://www.playframework.com/documentation" - print "~" + print("~ " + cmd + (' ' * (20 - len(cmd))) + commands[cmd].HELP[cmd]) + print("~") + print("~ Also refer to documentation at https://www.playframework.com/documentation") + print("~") def isCore(mod, play_env): path = os.path.realpath(mod.__file__) diff --git a/framework/pym/play/commands/intellij.py b/framework/pym/play/commands/intellij.py index a5f1e675dc..4c61e9c7d6 100644 --- a/framework/pym/play/commands/intellij.py +++ b/framework/pym/play/commands/intellij.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os, os.path import shutil @@ -66,7 +67,7 @@ def execute(**kargs): replaceAll(iprFile, r'%PROJECT_NAME%', application_name) - print "~ OK, the application is ready for Intellij Idea" - print "~ Use File, Open Project... to open \"" + application_name + ".ipr\"" - print "~" + print("~ OK, the application is ready for Intellij Idea") + print("~ Use File, Open Project... to open \"" + application_name + ".ipr\"") + print("~") diff --git a/framework/pym/play/commands/javadoc.py b/framework/pym/play/commands/javadoc.py index dcde0319cc..b366c6dbbf 100644 --- a/framework/pym/play/commands/javadoc.py +++ b/framework/pym/play/commands/javadoc.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os, os.path import shutil import subprocess @@ -19,7 +20,7 @@ def execute(**kargs): args = kargs.get("args") play_env = kargs.get("env") - if not os.environ.has_key('JAVA_HOME'): + if 'JAVA_HOME' not in os.environ: javadoc_path = "javadoc" else: javadoc_path = os.path.normpath("%s/bin/javadoc" % os.environ['JAVA_HOME']) @@ -38,7 +39,7 @@ def execute(**kargs): defineJavadocFiles(app, outdir) javadoc_cmd = [javadoc_path, '@'+os.path.join(outdir,'javadocOptions'), '@'+os.path.join(outdir,'javadocFiles')] - print "Generating Javadoc in " + outdir + "..." + print("Generating Javadoc in " + outdir + "...") return_code = subprocess.call(javadoc_cmd, env=os.environ, stdout=sout, stderr=serr) # Remove configuration file @@ -47,10 +48,10 @@ def execute(**kargs): # Display the status if return_code != 0: - print "Unable to create Javadocs. See " + os.path.join(app.log_path(), 'javadoc.err') + " for errors." + print("Unable to create Javadocs. See " + os.path.join(app.log_path(), 'javadoc.err') + " for errors.") sys.exit(return_code) - print "Done! You can open " + os.path.join(outdir, 'overview-tree.html') + " in your browser." + print("Done! You can open " + os.path.join(outdir, 'overview-tree.html') + " in your browser.") @@ -69,21 +70,21 @@ def defineJavadocOptions(app, outdir, args): f.write(' -footer "' + app.readConf('application.name') + '"') if args.count('--links'): - print "~ Build project Javadoc with links to :" + print("~ Build project Javadoc with links to :") args.remove('--links') # Add link to JavaDoc of JAVA javaVersion = getJavaVersion() - print "~ using java version \"%s\"" % javaVersion + print("~ using java version \"%s\"" % javaVersion) if javaVersion.startswith("1.5"): - print "~ Java(TM) Platform, Platform Standard Edition 5.0" - print "~ Java(TM) EE 5 Specification APIs" + print("~ Java(TM) Platform, Platform Standard Edition 5.0") + print("~ Java(TM) EE 5 Specification APIs") f.write(' -link http://docs.oracle.com/javase/1.5.0/docs/api/') f.write(' -link http://docs.oracle.com/javaee/5/api/') else: urlVersion = javaVersion[2:3] - print "~ Java(TM) Platform, Standard Edition " + urlVersion + " API Specification" - print "~ Java(TM) EE " + urlVersion + " Specification APIs" + print("~ Java(TM) Platform, Standard Edition " + urlVersion + " API Specification") + print("~ Java(TM) EE " + urlVersion + " Specification APIs") f.write(' -link http://docs.oracle.com/javase/' + urlVersion + '/docs/api/') f.write(' -link http://docs.oracle.com/javaee/' + urlVersion + '/api/') @@ -91,10 +92,10 @@ def defineJavadocOptions(app, outdir, args): # Add link to JavaDoc of Play Framework playVersion = app.play_env['version'] if "localbuild" in playVersion: - print "~ API documentation to Play! Framework V" + playVersion + " doesn't exist => link to V" + DEFAULT_API_VERSION + print("~ API documentation to Play! Framework V" + playVersion + " doesn't exist => link to V" + DEFAULT_API_VERSION) playVersion = DEFAULT_API_VERSION - print "~ Play Framework V" + playVersion + " API documentation" + print("~ Play Framework V" + playVersion + " API documentation") f.write(' -link https://www.playframework.com/documentation/' + playVersion + '/api/') diff --git a/framework/pym/play/commands/modulesrepo.py b/framework/pym/play/commands/modulesrepo.py index d8de6670fe..c05dbd125d 100644 --- a/framework/pym/play/commands/modulesrepo.py +++ b/framework/pym/play/commands/modulesrepo.py @@ -1,14 +1,20 @@ +from __future__ import print_function +from __future__ import division +from builtins import str +from builtins import input +from builtins import range +from builtins import object import os import subprocess import sys import re import zipfile -import urllib2 +import urllib.request, urllib.error, urllib.parse import shutil import string import imp import time -import urllib +import urllib.request, urllib.parse, urllib.error import yaml from play.utils import * @@ -78,12 +84,12 @@ class Downloader(object): before = .0 history = [] cycles = 0 - average = lambda self: sum(self.history) / (len(self.history) or 1) + average = lambda self: sum(self.history) // (len(self.history) or 1) def __init__(self, width=55): self.width = width - self.kibi = lambda bits: bits / 2 ** 10 - self.proc = lambda a, b: a / (b * 0.01) + self.kibi = lambda bits: bits // (2 ** 10) + self.proc = lambda a, b: a // (b * 0.01) def retrieve(self, url, destination, callback=None): self.size = 0 @@ -92,12 +98,12 @@ def retrieve(self, url, destination, callback=None): headers={'User-Agent':DEFAULT_USER_AGENT, 'Accept': 'application/json' } - req = urllib2.Request(url, headers=headers) - result = urllib2.urlopen(req) + req = urllib.request.Request(url, headers=headers) + result = urllib.request.urlopen(req) self.chunk_read(result, destination, report_hook=self.chunk_report) except KeyboardInterrupt: - print '\n~ Download cancelled' - print '~' + print('\n~ Download cancelled') + print('~') for i in range(5): try: os.remove(destination) @@ -107,7 +113,7 @@ def retrieve(self, url, destination, callback=None): else: raise if callback: callback() sys.exit() - print '' + print('') return self.size def chunk_read(self, response, destination, chunk_size=8192, report_hook=None): @@ -151,13 +157,13 @@ def progress(self, bytes_so_far, blocksize, filesize): now = time.clock() elapsed = now-self.before if elapsed: - speed = self.kibi(blocksize * 3 / elapsed) + speed = self.kibi(blocksize * 3 // elapsed) self.history.append(speed) self.history = self.history[-4:] self.before = now - average = round(sum(self.history[-4:]) / 4, 1) + average = round(sum(self.history[-4:]) // 4, 1) self.size = self.kibi(bits) - print '\r~ [%s] %s KiB/s ' % (bar, str(average)), + print('\r~ [%s] %s KiB/s ' % (bar, str(average)), end=' ') def bar(self, bytes_so_far, filesize, done): span = self.width * done * 0.01 @@ -165,7 +171,7 @@ def bar(self, bytes_so_far, filesize, done): result = ('%s of %s KiB (%d%%)' % (self.kibi(bytes_so_far), self.kibi(filesize), done,)).center(self.width) return result.replace(' ', '-', int(span - offset)) -class Unzip: +class Unzip(object): def __init__(self, verbose = False, percent = 10): self.verbose = verbose self.percent = percent @@ -178,12 +184,12 @@ def extract(self, file, dir): self._createstructure(file, dir) num_files = len(zf.namelist()) percent = self.percent - divisions = 100 / percent - perc = int(num_files / divisions) + divisions = 100 // percent + perc = int( num_files / divisions) # extract files to directory structure for i, name in enumerate(zf.namelist()): if self.verbose == True: - print "Extracting %s" % name + print("Extracting %s" % name) elif perc > 0 and (i % perc) == 0 and i > 0: complete = int (i / perc) * percent if not name.endswith('/'): @@ -219,12 +225,12 @@ def _listdirs(self, file): def new(app, args, play_env): if os.path.exists(app.path): - print "~ Oops. %s already exists" % app.path - print "~" + print("~ Oops. %s already exists" % app.path) + print("~") sys.exit(-1) - print "~ The new module will be created in %s" % os.path.normpath(app.path) - print "~" + print("~ The new module will be created in %s" % os.path.normpath(app.path)) + print("~") application_name = os.path.basename(app.path) copy_directory(os.path.join(play_env["basedir"], 'resources/module-skel'), app.path) # check_application() @@ -247,25 +253,25 @@ def new(app, args, play_env): os.mkdir(os.path.join(app.path, 'src/play/modules')) os.mkdir(os.path.join(app.path, 'src/play/modules/%s' % application_name)) - print "~ OK, the module is created." - print "~ Start using it by adding it to the dependencies.yml of your project, as decribed in the documentation." - print "~" - print "~ Have fun!" - print "~" + print("~ OK, the module is created.") + print("~ Start using it by adding it to the dependencies.yml of your project, as decribed in the documentation.") + print("~") + print("~ Have fun!") + print("~") def list(app, args): - print "~ You can also browse this list online at:" + print("~ You can also browse this list online at:") for repo in repositories: - print "~ %s/modules" % repo - print "~" + print("~ %s/modules" % repo) + print("~") modules_list = load_module_list() for mod in modules_list: - print "~ [%s]" % mod['name'] - print "~ %s" % mod['fullname'] - print "~ %s/modules/%s" % (mod['server'], mod['name']) + print("~ [%s]" % mod['name']) + print("~ %s" % mod['fullname']) + print("~ %s/modules/%s" % (mod['server'], mod['name'])) vl = '' i = 0 @@ -276,17 +282,17 @@ def list(app, args): vl += ', ' if vl: - print "~ Versions: %s" % vl + print("~ Versions: %s" % vl) else: - print "~ (No versions released yet)" - print "~" + print("~ (No versions released yet)") + print("~") - print "~ To install one of these modules use:" - print "~ play install module-version (eg: play install scala-1.0)" - print "~" - print "~ Or you can just install the default release of a module using:" - print "~ play install module (eg: play install scala)" - print "~" + print("~ To install one of these modules use:") + print("~ play install module-version (eg: play install scala-1.0)") + print("~") + print("~ Or you can just install the default release of a module using:") + print("~ play install module (eg: play install scala)") + print("~") def build(app, args, env): @@ -305,9 +311,9 @@ def build(app, args, env): version = a if o in ('--require'): fwkMatch = a - except getopt.GetoptError, err: - print "~ %s" % str(err) - print "~ " + except getopt.GetoptError as err: + print("~ %s" % str(err)) + print("~ ") sys.exit(-1) deps_file = os.path.join(app.path, 'conf', 'dependencies.yml') @@ -324,7 +330,7 @@ def build(app, args, env): version = splitted.pop() name = splitted.pop() for dep in deps["require"]: - if isinstance(dep, basestring): + if isinstance(dep, str): splitted = dep.split(" ") if len(splitted) == 2 and splitted[0] == "play": fwkMatch = splitted[1] @@ -334,9 +340,9 @@ def build(app, args, env): if name is None: name = os.path.basename(app.path) if version is None: - version = raw_input("~ What is the module version number? ") + version = input("~ What is the module version number? ") if fwkMatch is None: - fwkMatch = raw_input("~ What are the playframework versions required? ") + fwkMatch = input("~ What are the playframework versions required? ") if os.path.exists(deps_file): f = open(deps_file) @@ -358,11 +364,11 @@ def build(app, args, env): build_file = os.path.join(app.path, 'build.xml') if os.path.exists(build_file): - print "~" - print "~ Building..." - print "~" + print("~") + print("~ Building...") + print("~") status = subprocess.call('ant -f %s -Dplay.path=%s' % (build_file, ftb), shell=True) - print "~" + print("~") if status: sys.exit(status) @@ -404,16 +410,16 @@ def build(app, args, env): except: pass - print "~" - print "~ Done!" - print "~ Package is available at %s" % os.path.join(dist_dir, '%s.zip' % mv) - print "~" + print("~") + print("~ Done!") + print("~ Package is available at %s" % os.path.join(dist_dir, '%s.zip' % mv)) + print("~") def install(app, args, env): if len(sys.argv) < 3: help_file = os.path.join(env["basedir"], 'documentation/commands/cmd-install.txt') - print open(help_file, 'r').read() + print(open(help_file, 'r').read()) sys.exit(0) name = cmd = sys.argv[2] @@ -422,7 +428,7 @@ def install(app, args, env): version = groups.group(3) server = None - if args is not None: + if args != None: for param in args: if param.startswith("--force-server="): server = param[15:] @@ -433,48 +439,48 @@ def install(app, args, env): if mod['name'] == module: for v in mod['versions']: if version is None and v['isDefault']: - print '~ Will install %s-%s' % (module, v['version']) - print '~ This module is compatible with: %s' % v['matches'] - ok = raw_input('~ Do you want to install this version (y/n)? ') + print('~ Will install %s-%s' % (module, v['version'])) + print('~ This module is compatible with: %s' % v['matches']) + ok = input('~ Do you want to install this version (y/n)? ') if not ok == 'y': - print '~' + print('~') sys.exit(-1) - print '~ Installing module %s-%s...' % (module, v['version']) + print('~ Installing module %s-%s...' % (module, v['version'])) fetch = '%s/modules/%s-%s.zip' % (mod['server'], module, v['version']) break if version == v['version']: - print '~ Will install %s-%s' % (module, v['version']) - print '~ This module is compatible with: %s' % v['matches'] - ok = raw_input('~ Do you want to install this version (y/n)? ') + print('~ Will install %s-%s' % (module, v['version'])) + print('~ This module is compatible with: %s' % v['matches']) + ok = input('~ Do you want to install this version (y/n)? ') if not ok == 'y': - print '~' + print('~') sys.exit(-1) - print '~ Installing module %s-%s...' % (module, v['version']) + print('~ Installing module %s-%s...' % (module, v['version'])) fetch = '%s/modules/%s-%s.zip' % (mod['server'], module, v['version']) break if fetch is None: - print '~ No module found \'%s\'' % name - print '~ Try play list-modules to get the modules list' - print '~' + print('~ No module found \'%s\'' % name) + print('~ Try play list-modules to get the modules list') + print('~') sys.exit(-1) archive = os.path.join(env["basedir"], 'modules/%s-%s.zip' % (module, v['version'])) if os.path.exists(archive): os.remove(archive) - print '~' - print '~ Fetching %s' % fetch + print('~') + print('~ Fetching %s' % fetch) Downloader().retrieve(fetch, archive) if not os.path.exists(archive): - print '~ Oops, file does not exist' - print '~' + print('~ Oops, file does not exist') + print('~') sys.exist(-1) - print '~ Unzipping...' + print('~ Unzipping...') if os.path.exists(os.path.join(env["basedir"], 'modules/%s-%s' % (module, v['version']))): shutil.rmtree(os.path.join(env["basedir"], 'modules/%s-%s' % (module, v['version']))) @@ -482,13 +488,13 @@ def install(app, args, env): Unzip().extract(archive, os.path.join(env["basedir"], 'modules/%s-%s' % (module, v['version']))) os.remove(archive) - print '~' - print '~ Module %s-%s is installed!' % (module, v['version']) - print '~ You can now use it by adding it to the dependencies.yml file:' - print '~' - print '~ require:' - print '~ play -> %s %s' % (module, v['version']) - print '~' + print('~') + print('~ Module %s-%s is installed!' % (module, v['version'])) + print('~ You can now use it by adding it to the dependencies.yml file:') + print('~') + print('~ require:') + print('~ play -> %s %s' % (module, v['version'])) + print('~') sys.exit(0) @@ -501,20 +507,20 @@ def add(app, args, env): for o, a in optlist: if o in ('--module'): m = a - except getopt.GetoptError, err: - print "~ %s" % str(err) - print "~ " + except getopt.GetoptError as err: + print("~ %s" % str(err)) + print("~ ") sys.exit(-1) if m is None: - print "~ Usage: play add --module=" - print "~ " + print("~ Usage: play add --module=") + print("~ ") sys.exit(-1) appConf = os.path.join(app.path, 'conf/application.conf') if not fileHas(appConf, '# ---- MODULES ----'): - print "~ Line '---- MODULES ----' missing in your application.conf. Add it to use this command." - print "~ " + print("~ Line '---- MODULES ----' missing in your application.conf. Add it to use this command.") + print("~ ") sys.exit(-1) mn = m @@ -522,13 +528,13 @@ def add(app, args, env): mn = mn[:mn.find('-')] if mn in app.module_names(): - print "~ Module %s already declared in application.conf, not doing anything." % mn - print "~ " + print("~ Module %s already declared in application.conf, not doing anything." % mn) + print("~ ") sys.exit(-1) replaceAll(appConf, r'# ---- MODULES ----', '# ---- MODULES ----\nmodule.%s=${play.path}/modules/%s' % (mn, m) ) - print "~ Module %s add to application %s." % (mn, app.name()) - print "~ " + print("~ Module %s add to application %s." % (mn, app.name())) + print("~ ") def load_module_list(custom_server=None): @@ -543,7 +549,7 @@ def any(arr, func): return False modules = None - if custom_server is not None: + if custom_server != None: rev = [custom_server] else: rev = repositories[:] # clone @@ -552,7 +558,7 @@ def any(arr, func): for repo in rev: result = load_modules_from(repo) if modules is None: - modules = map(lambda m: addServer(m, repo), result['modules']) + modules = [addServer(m, repo) for m in result['modules']] else: for module in result['modules']: if not any(modules, lambda m: m['name'] == module['name']): @@ -566,17 +572,17 @@ def load_modules_from(modules_server): headers={'User-Agent':DEFAULT_USER_AGENT, 'Accept': 'application/json' } - req = urllib2.Request(url, headers=headers) - result = urllib2.urlopen(req) + req = urllib.request.Request(url, headers=headers) + result = urllib.request.urlopen(req) return json.loads(result.read()) - except urllib2.HTTPError, e: - print "~ Oops," - print "~ Cannot fetch the modules list from %s (%s)..." % (url, e.code) - print e.reason - print "~" + except urllib.error.HTTPError as e: + print("~ Oops,") + print("~ Cannot fetch the modules list from %s (%s)..." % (url, e.code)) + print(e.reason) + print("~") sys.exit(-1) - except urllib2.URLError, e: - print "~ Oops," - print "~ Cannot fetch the modules list from %s ..." % (url) - print "~" + except urllib.error.URLError as e: + print("~ Oops,") + print("~ Cannot fetch the modules list from %s ..." % (url)) + print("~") sys.exit(-1) diff --git a/framework/pym/play/commands/netbeans.py b/framework/pym/play/commands/netbeans.py index f0ce397ed7..cca700f087 100644 --- a/framework/pym/play/commands/netbeans.py +++ b/framework/pym/play/commands/netbeans.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os, os.path import shutil import time @@ -43,8 +44,8 @@ def execute(**kargs): if not re.search("\.[svn|git|hg|scc|vssscc]", dir): mr = '%s' % (dir, dir) replaceAll(os.path.join(nbproject, 'project.xml'), r'%MORE%', mr) - print "~ OK, the application is ready for netbeans" - print "~ Just open %s as a netbeans project" % os.path.normpath(app.path) - print "~" - print "~ Use netbeansify again when you want to update netbeans configuration files, then close and open you project again." - print "~" + print("~ OK, the application is ready for netbeans") + print("~ Just open %s as a netbeans project" % os.path.normpath(app.path)) + print("~") + print("~ Use netbeansify again when you want to update netbeans configuration files, then close and open you project again.") + print("~") diff --git a/framework/pym/play/commands/precompile.py b/framework/pym/play/commands/precompile.py index 9b3c1e1784..9064d79af3 100644 --- a/framework/pym/play/commands/precompile.py +++ b/framework/pym/play/commands/precompile.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os, os.path import shutil import subprocess @@ -26,6 +27,6 @@ def execute(**kargs): try: return subprocess.call(java_cmd, env=os.environ) except OSError: - print "~ Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " - print "~ " + print("~ Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") + print("~ ") sys.exit(-1) diff --git a/framework/pym/play/commands/secret.py b/framework/pym/play/commands/secret.py index a5237d24bd..a67fe733fe 100644 --- a/framework/pym/play/commands/secret.py +++ b/framework/pym/play/commands/secret.py @@ -1,3 +1,4 @@ +from __future__ import print_function from play.utils import * COMMANDS = ['secret'] @@ -10,8 +11,8 @@ def execute(**kargs): app = kargs.get("app") app.check() - print "~ Generating the secret key..." + print("~ Generating the secret key...") sk = secretKey() replaceAll(os.path.join(app.path, 'conf', 'application.conf'), r'application.secret=.*', 'application.secret=%s' % sk, True) - print "~ Keep the secret : %s" % sk - print "~" + print("~ Keep the secret : %s" % sk) + print("~") diff --git a/framework/pym/play/commands/status.py b/framework/pym/play/commands/status.py index a053d8ba6a..538b190d6c 100644 --- a/framework/pym/play/commands/status.py +++ b/framework/pym/play/commands/status.py @@ -1,7 +1,9 @@ +from __future__ import print_function +from builtins import str import os, os.path import shutil import getopt -import urllib2 +import urllib.request, urllib.error, urllib.parse from play.utils import * @@ -29,9 +31,9 @@ def execute(**kargs): url = a + '/@status' if o in ('--secret'): secret_key = a - except getopt.GetoptError, err: - print "~ %s" % str(err) - print "~ " + except getopt.GetoptError as err: + print("~ %s" % str(err)) + print("~ ") sys.exit(-1) if not url or not secret_key: @@ -43,21 +45,21 @@ def execute(**kargs): secret_key = app.readConf('application.statusKey') try: - proxy_handler = urllib2.ProxyHandler({}) - req = urllib2.Request(url) + proxy_handler = urllib.request.ProxyHandler({}) + req = urllib.request.Request(url) req.add_header('Authorization', secret_key) - opener = urllib2.build_opener(proxy_handler) + opener = urllib.request.build_opener(proxy_handler) status = opener.open(req) - print '~ Status from %s,' % url - print '~' - print status.read() - print '~' - except urllib2.HTTPError, e: - print "~ Cannot retrieve the application status... (%s)" % (e.code) - print "~" + print('~ Status from %s,' % url) + print('~') + print(status.read()) + print('~') + except urllib.error.HTTPError as e: + print("~ Cannot retrieve the application status... (%s)" % (e.code)) + print("~") sys.exit(-1) - except urllib2.URLError, e: - print "~ Cannot contact the application..." - print "~" + except urllib.error.URLError as e: + print("~ Cannot contact the application...") + print("~") sys.exit(-1) - print + print() diff --git a/framework/pym/play/commands/test.py b/framework/pym/play/commands/test.py index f956c11072..2288e2819d 100644 --- a/framework/pym/play/commands/test.py +++ b/framework/pym/play/commands/test.py @@ -1,3 +1,4 @@ +from __future__ import print_function import sys import subprocess @@ -21,17 +22,17 @@ def execute(**kargs): def test(app, args): app.check() java_cmd = app.java_cmd(args) - print "~ Running in test mode" - print "~ Ctrl+C to stop" - print "~ " + print("~ Running in test mode") + print("~ Ctrl+C to stop") + print("~ ") try: return_code = subprocess.call(java_cmd, env=os.environ) if 0 != return_code: sys.exit(return_code) except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) - print "~ " + print("~ ") diff --git a/framework/pym/play/commands/version.py b/framework/pym/play/commands/version.py index 309d6e187c..83512cb01f 100644 --- a/framework/pym/play/commands/version.py +++ b/framework/pym/play/commands/version.py @@ -1,3 +1,4 @@ +from __future__ import print_function COMMANDS = ['version'] @@ -11,4 +12,4 @@ def execute(**kargs): # If we've shown the logo, then the version has already been printed if not showLogo: - print env["version"] + print(env["version"]) diff --git a/framework/pym/play/commands/war.py b/framework/pym/play/commands/war.py index cf69f33087..35eac30f7c 100644 --- a/framework/pym/play/commands/war.py +++ b/framework/pym/play/commands/war.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import str import sys import os import getopt @@ -32,50 +34,50 @@ def execute(**kargs): war_zip_path = war_path + '.war' if o in ('--exclude'): war_exclusion_list = a.split(':') - print "~ Excluding these directories :" + print("~ Excluding these directories :") for excluded in war_exclusion_list: - print "~ %s" %excluded - except getopt.GetoptError, err: - print "~ %s" % str(err) - print "~ Please specify a path where to generate the WAR, using the -o or --output option." - print "~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp)." - print "~ " + print("~ %s" %excluded) + except getopt.GetoptError as err: + print("~ %s" % str(err)) + print("~ Please specify a path where to generate the WAR, using the -o or --output option.") + print("~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp).") + print("~ ") sys.exit(-1) if not war_path: - print "~ Oops. Please specify a path where to generate the WAR, using the -o or --output option" - print "~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp)." - print "~" + print("~ Oops. Please specify a path where to generate the WAR, using the -o or --output option") + print("~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp).") + print("~") sys.exit(-1) if os.path.exists(war_path) and not os.path.exists(os.path.join(war_path, 'WEB-INF')): - print "~ Oops. The destination path already exists but does not seem to host a valid WAR structure" - print "~" + print("~ Oops. The destination path already exists but does not seem to host a valid WAR structure") + print("~") sys.exit(-1) if isParentOf(app.path, war_path) and not isExcluded(war_path, war_exclusion_list): - print "~ Oops. Please specify a destination directory outside of the application" - print "~ or exclude war destination directory using the --exclude option and ':'-separator " - print "~ (eg: --exclude .svn:target:logs:tmp)." - print "~" + print("~ Oops. Please specify a destination directory outside of the application") + print("~ or exclude war destination directory using the --exclude option and ':'-separator ") + print("~ (eg: --exclude .svn:target:logs:tmp).") + print("~") sys.exit(-1) # Precompile first precompilation_result = play.commands.precompile.execute(command=command, app=app, args=args, env=env) if precompilation_result != 0: - print "~ Please fix compilation errors before packaging WAR" - print "~" + print("~ Please fix compilation errors before packaging WAR") + print("~") sys.exit(precompilation_result) # Package package_as_war(app, env, war_path, war_zip_path, war_exclusion_list) - print "~ Done !" - print "~" - print "~ You can now load %s as a standard WAR into your servlet container" % (os.path.normpath(war_path)) - print "~ You can't use play standard commands to run/stop/debug the WAR application..." - print "~ ... just use your servlet container commands instead" - print "~" - print "~ Have fun!" - print "~" + print("~ Done !") + print("~") + print("~ You can now load %s as a standard WAR into your servlet container" % (os.path.normpath(war_path))) + print("~ You can't use play standard commands to run/stop/debug the WAR application...") + print("~ ... just use your servlet container commands instead") + print("~") + print("~ Have fun!") + print("~") diff --git a/framework/pym/play/utils.py b/framework/pym/play/utils.py index 88866ba1f7..d695c9b225 100644 --- a/framework/pym/play/utils.py +++ b/framework/pym/play/utils.py @@ -1,3 +1,6 @@ +from __future__ import print_function +from builtins import str +from builtins import range import sys import os, os.path import re @@ -75,8 +78,8 @@ def getWithModules(args, env): dirname = os.path.join(env["basedir"], 'modules/%s' % f) break if not dirname: - print "~ Oops. Module " + m + " not found (try running `play install " + m + "`)" - print "~" + print("~ Oops. Module " + m + " not found (try running `play install " + m + "`)") + print("~") sys.exit(-1) md.append(dirname) @@ -91,23 +94,23 @@ def package_as_war(app, env, war_path, war_zip_path, war_exclusion_list = None): classpath = app.getClasspath() if not war_path: - print "~ Oops. Please specify a path where to generate the WAR, using the -o or --output option" - print "~" + print("~ Oops. Please specify a path where to generate the WAR, using the -o or --output option") + print("~") sys.exit(-1) if os.path.exists(war_path) and not os.path.exists(os.path.join(war_path, 'WEB-INF')): - print "~ Oops. The destination path already exists but does not seem to host a valid WAR structure" - print "~" + print("~ Oops. The destination path already exists but does not seem to host a valid WAR structure") + print("~") sys.exit(-1) if isParentOf(app.path, war_path) and not isExcluded(war_path, war_exclusion_list): - print "~ Oops. Please specify a destination directory outside of the application" - print "~ or exclude war destination directory using the --exclude option and ':'-separator " - print "~ (eg: --exclude .svn:target:logs:tmp)." - print "~" + print("~ Oops. Please specify a destination directory outside of the application") + print("~ or exclude war destination directory using the --exclude option and ':'-separator ") + print("~ (eg: --exclude .svn:target:logs:tmp).") + print("~") sys.exit(-1) - print "~ Packaging current version of the framework and the application to %s ..." % (os.path.normpath(war_path)) + print("~ Packaging current version of the framework and the application to %s ..." % (os.path.normpath(war_path))) if os.path.exists(war_path): shutil.rmtree(war_path) if os.path.exists(os.path.join(app.path, 'war')): copy_directory(os.path.join(app.path, 'war'), war_path) @@ -118,7 +121,7 @@ def package_as_war(app, env, war_path, war_zip_path, war_exclusion_list = None): shutil.copyfile(os.path.join(env["basedir"], 'resources/war/web.xml'), os.path.join(war_path, 'WEB-INF/web.xml')) application_name = app.readConf('application.name') replaceAll(os.path.join(war_path, 'WEB-INF/web.xml'), r'%APPLICATION_NAME%', application_name) - if env["id"] is not "": + if env["id"] != "": replaceAll(os.path.join(war_path, 'WEB-INF/web.xml'), r'%PLAY_ID%', env["id"]) else: replaceAll(os.path.join(war_path, 'WEB-INF/web.xml'), r'%PLAY_ID%', 'war') @@ -167,7 +170,7 @@ def package_as_war(app, env, war_path, war_zip_path, war_exclusion_list = None): shutil.copyfile(os.path.join(env["basedir"], 'resources/messages'), os.path.join(war_path, 'WEB-INF/resources/messages')) if war_zip_path: - print "~ Creating zipped archive to %s ..." % (os.path.normpath(war_zip_path)) + print("~ Creating zipped archive to %s ..." % (os.path.normpath(war_zip_path))) if os.path.exists(war_zip_path): os.remove(war_zip_path) zip = zipfile.ZipFile(war_zip_path, 'w', zipfile.ZIP_STORED) @@ -242,7 +245,7 @@ def isTestFrameworkId( framework_id ): return (framework_id == 'test' or (framework_id.startswith('test-') and framework_id.__len__() >= 6 )) def java_path(): - if not os.environ.has_key('JAVA_HOME'): + if 'JAVA_HOME' not in os.environ: return "java" else: return os.path.normpath("%s/bin/java" % os.environ['JAVA_HOME']) @@ -257,5 +260,5 @@ def getJavaVersion(): if result: return result.group(1) else: - print "Unable to retrieve java version from " + javaVersion + print("Unable to retrieve java version from " + javaVersion) return "" diff --git a/framework/pym/simplejson/__init__.py b/framework/pym/simplejson/__init__.py index d5b4d39913..2e3b838ee8 100644 --- a/framework/pym/simplejson/__init__.py +++ b/framework/pym/simplejson/__init__.py @@ -5,24 +5,23 @@ :mod:`simplejson` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained version of the :mod:`json` library contained in Python 2.6, but maintains -compatibility with Python 2.4 and Python 2.5 and (currently) has -significant performance advantages, even without using the optional C -extension for speedups. +compatibility back to Python 2.5 and (currently) has significant performance +advantages, even without using the optional C extension for speedups. Encoding basic Python object hierarchies:: >>> import simplejson as json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") + >>> print(json.dumps("\"foo\bar")) "\"foo\bar" - >>> print json.dumps(u'\u1234') + >>> print(json.dumps(u'\u1234')) "\u1234" - >>> print json.dumps('\\') + >>> print(json.dumps('\\')) "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -31,14 +30,14 @@ Compact encoding:: >>> import simplejson as json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',',':'), sort_keys=True) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import simplejson as json - >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) - >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')) { "4": 5, "6": 7 @@ -52,7 +51,7 @@ True >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -68,8 +67,8 @@ >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', ... object_hook=as_complex) (1+2j) - >>> import decimal - >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + >>> from decimal import Decimal + >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') True Specializing JSON object encoding:: @@ -78,7 +77,8 @@ >>> def encode_complex(obj): ... if isinstance(obj, complex): ... return [obj.real, obj.imag] - ... raise TypeError(repr(o) + " is not JSON serializable") + ... raise TypeError('Object of type %s is not JSON serializable' % + ... obj.__class__.__name__) ... >>> json.dumps(2 + 1j, default=encode_complex) '[2.0, 1.0]' @@ -87,7 +87,6 @@ >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) '[2.0, 1.0]' - Using simplejson.tool from the shell to validate and pretty-print:: $ echo '{"json":"obj"}' | python -m simplejson.tool @@ -95,18 +94,60 @@ "json": "obj" } $ echo '{ 1.2:3.4}' | python -m simplejson.tool - Expecting property name: line 1 column 2 (char 2) + Expecting property name: line 1 column 3 (char 2) + +Parsing multiple documents serialized as JSON lines (newline-delimited JSON):: + + >>> import simplejson as json + >>> def loads_lines(docs): + ... for doc in docs.splitlines(): + ... yield json.loads(doc) + ... + >>> sum(doc["count"] for doc in loads_lines('{"count":1}\n{"count":2}\n{"count":3}\n')) + 6 + +Serializing multiple objects to JSON lines (newline-delimited JSON):: + + >>> import simplejson as json + >>> def dumps_lines(objs): + ... for obj in objs: + ... yield json.dumps(obj, separators=(',',':')) + '\n' + ... + >>> ''.join(dumps_lines([{'count': 1}, {'count': 2}, {'count': 3}])) + '{"count":1}\n{"count":2}\n{"count":3}\n' + """ -__version__ = '2.0.9' +from __future__ import absolute_import +__version__ = '3.17.2' __all__ = [ 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONEncoder', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', + 'OrderedDict', 'simple_first', 'RawJSON' ] __author__ = 'Bob Ippolito ' -from decoder import JSONDecoder -from encoder import JSONEncoder +from decimal import Decimal + +from .errors import JSONDecodeError +from .raw_json import RawJSON +from .decoder import JSONDecoder +from .encoder import JSONEncoder, JSONEncoderForHTML +def _import_OrderedDict(): + import collections + try: + return collections.OrderedDict + except AttributeError: + from . import ordered_dict + return ordered_dict.OrderedDict +OrderedDict = _import_OrderedDict() + +def _import_c_make_encoder(): + try: + from ._speedups import make_encoder + return make_encoder + except ImportError: + return None _default_encoder = JSONEncoder( skipkeys=False, @@ -117,56 +158,115 @@ separators=None, encoding='utf-8', default=None, + use_decimal=True, + namedtuple_as_object=True, + tuple_as_array=True, + iterable_as_array=False, + bigint_as_string=False, + item_sort_key=None, + for_json=False, + ignore_nan=False, + int_as_string_bitcount=None, ) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, + iterable_as_array=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). - If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + If *skipkeys* is true then ``dict`` keys that are not basic types + (``str``, ``int``, ``long``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is false, then the some chunks written to ``fp`` - may be ``unicode`` instances, subject to normal Python ``str`` to - ``unicode`` coercion rules. Unless ``fp.write()`` explicitly - understands ``unicode`` (as in ``codecs.getwriter()``) this is likely - to cause an error. - - If ``check_circular`` is false, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). + If *ensure_ascii* is false (default: ``True``), then the output may + contain non-ASCII characters, so long as they do not need to be escaped + by JSON. When it is true, all non-ASCII characters are escaped. - If ``allow_nan`` is false, then it will be a ``ValueError`` to + If *allow_nan* is false, then it will be a ``ValueError`` to serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + in strict compliance of the original JSON specification, instead of using + the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See + *ignore_nan* for ECMA-262 compliant behavior. - If ``indent`` is a non-negative integer, then JSON array elements and object - members will be pretty-printed with that indent level. An indent level - of 0 will only insert newlines. ``None`` is the most compact representation. + If *indent* is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If specified, *separators* should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. - ``encoding`` is the character encoding for str instances, default is UTF-8. + *encoding* is the character encoding for str instances, default is UTF-8. - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. + *default(obj)* is a function that should return a serializable version + of obj or raise ``TypeError``. The default simply raises ``TypeError``. + + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + + If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. Note that this is still a + lossy operation that will not round-trip correctly and should be used + sparingly. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precedence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead + of subclassing whenever possible. """ # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array and not iterable_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and int_as_string_bitcount is None + and not kw + ): iterable = _default_encoder.iterencode(obj) else: if cls is None: @@ -174,7 +274,17 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, - default=default, **kw).iterencode(obj) + default=default, use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + iterable_as_array=iterable_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, + **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost for chunk in iterable: @@ -182,17 +292,21 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, + iterable_as_array=False, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is false then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + (``str``, ``int``, ``long``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is false, then the return value will be a - ``unicode`` instance subject to normal Python ``str`` to ``unicode`` - coercion rules instead of being escaped to an ASCII ``str``. + If *ensure_ascii* is false (default: ``True``), then the output may + contain non-ASCII characters, so long as they do not need to be escaped + by JSON. When it is true, all non-ASCII characters are escaped. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will @@ -203,30 +317,81 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, strict compliance of the JSON specification, instead of using the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. + If ``indent`` is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If specified, ``separators`` should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. - ``encoding`` is the character encoding for str instances, default is UTF-8. + ``encoding`` is the character encoding for bytes instances, default is + UTF-8. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + + If *bigint_as_string* is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precendence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing + whenever possible. """ # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array and not iterable_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and int_as_string_bitcount is None + and not kw + ): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder @@ -234,85 +399,186 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, default=default, + use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + iterable_as_array=iterable_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, **kw).encode(obj) -_default_decoder = JSONDecoder(encoding=None, object_hook=None) +_default_decoder = JSONDecoder(encoding=None, object_hook=None, + object_pairs_hook=None) def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, + **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing - a JSON document) to a Python object. - - If the contents of ``fp`` is encoded with an ASCII based encoding other - than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must - be specified. Encodings that are not ASCII based (such as UCS-2) are - not allowed, and should be wrapped with - ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` - object and passed to ``loads()`` - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). + a JSON document as `str` or `bytes`) to a Python object. + + *encoding* determines the encoding used to interpret any + `bytes` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding `str` objects. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. """ return loads(fp.read(), encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, **kw) + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, + use_decimal=use_decimal, **kw) def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, **kw): """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) to a Python object. - If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding - other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name - must be specified. Encodings that are not ASCII based (such as UCS-2) - are not allowed and should be decoded to ``unicode`` first. - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN, null, true, false. - This can be used to raise an exception if invalid JSON numbers - are encountered. + *encoding* determines the encoding used to interpret any + :class:`bytes` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. """ if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and - parse_constant is None and not kw): + parse_constant is None and object_pairs_hook is None + and not use_decimal and not kw): return _default_decoder.decode(s) if cls is None: cls = JSONDecoder if object_hook is not None: kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook if parse_float is not None: kw['parse_float'] = parse_float if parse_int is not None: kw['parse_int'] = parse_int if parse_constant is not None: kw['parse_constant'] = parse_constant + if use_decimal: + if parse_float is not None: + raise TypeError("use_decimal=True implies parse_float=Decimal") + kw['parse_float'] = Decimal return cls(encoding=encoding, **kw).decode(s) + + +def _toggle_speedups(enabled): + from . import decoder as dec + from . import encoder as enc + from . import scanner as scan + c_make_encoder = _import_c_make_encoder() + if enabled: + dec.scanstring = dec.c_scanstring or dec.py_scanstring + enc.c_make_encoder = c_make_encoder + enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or + enc.py_encode_basestring_ascii) + scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner + else: + dec.scanstring = dec.py_scanstring + enc.c_make_encoder = None + enc.encode_basestring_ascii = enc.py_encode_basestring_ascii + scan.make_scanner = scan.py_make_scanner + dec.make_scanner = scan.make_scanner + global _default_decoder + _default_decoder = JSONDecoder( + encoding=None, + object_hook=None, + object_pairs_hook=None, + ) + global _default_encoder + _default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + ) + +def simple_first(kv): + """Helper function to pass to item_sort_key to sort simple + elements to the top, then container elements. + """ + return (isinstance(kv[1], (list, dict, tuple)), kv[0]) diff --git a/framework/pym/simplejson/_speedups.c b/framework/pym/simplejson/_speedups.c deleted file mode 100644 index 23b5f4a6e6..0000000000 --- a/framework/pym/simplejson/_speedups.c +++ /dev/null @@ -1,2329 +0,0 @@ -#include "Python.h" -#include "structmember.h" -#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) -#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) -#endif -#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) -typedef int Py_ssize_t; -#define PY_SSIZE_T_MAX INT_MAX -#define PY_SSIZE_T_MIN INT_MIN -#define PyInt_FromSsize_t PyInt_FromLong -#define PyInt_AsSsize_t PyInt_AsLong -#endif -#ifndef Py_IS_FINITE -#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) -#endif - -#ifdef __GNUC__ -#define UNUSED __attribute__((__unused__)) -#else -#define UNUSED -#endif - -#define DEFAULT_ENCODING "utf-8" - -#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) -#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) -#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) -#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) - -static PyTypeObject PyScannerType; -static PyTypeObject PyEncoderType; - -typedef struct _PyScannerObject { - PyObject_HEAD - PyObject *encoding; - PyObject *strict; - PyObject *object_hook; - PyObject *parse_float; - PyObject *parse_int; - PyObject *parse_constant; -} PyScannerObject; - -static PyMemberDef scanner_members[] = { - {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, - {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, - {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, - {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, - {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, - {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, - {NULL} -}; - -typedef struct _PyEncoderObject { - PyObject_HEAD - PyObject *markers; - PyObject *defaultfn; - PyObject *encoder; - PyObject *indent; - PyObject *key_separator; - PyObject *item_separator; - PyObject *sort_keys; - PyObject *skipkeys; - int fast_encode; - int allow_nan; -} PyEncoderObject; - -static PyMemberDef encoder_members[] = { - {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, - {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, - {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, - {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, - {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, - {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, - {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, - {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, - {NULL} -}; - -static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); -static PyObject * -ascii_escape_unicode(PyObject *pystr); -static PyObject * -ascii_escape_str(PyObject *pystr); -static PyObject * -py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); -void init_speedups(void); -static PyObject * -scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); -static PyObject * -scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); -static PyObject * -_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); -static PyObject * -scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); -static int -scanner_init(PyObject *self, PyObject *args, PyObject *kwds); -static void -scanner_dealloc(PyObject *self); -static int -scanner_clear(PyObject *self); -static PyObject * -encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); -static int -encoder_init(PyObject *self, PyObject *args, PyObject *kwds); -static void -encoder_dealloc(PyObject *self); -static int -encoder_clear(PyObject *self); -static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); -static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); -static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); -static PyObject * -_encoded_const(PyObject *const); -static void -raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj); -static int -_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); -static PyObject * -_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); -static PyObject * -encoder_encode_float(PyEncoderObject *s, PyObject *obj); - -#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') -#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) - -#define MIN_EXPANSION 6 -#ifdef Py_UNICODE_WIDE -#define MAX_EXPANSION (2 * MIN_EXPANSION) -#else -#define MAX_EXPANSION MIN_EXPANSION -#endif - -static int -_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) -{ - /* PyObject to Py_ssize_t converter */ - *size_ptr = PyInt_AsSsize_t(o); - if (*size_ptr == -1 && PyErr_Occurred()); - return 1; - return 0; -} - -static PyObject * -_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) -{ - /* Py_ssize_t to PyObject converter */ - return PyInt_FromSsize_t(*size_ptr); -} - -static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) -{ - /* Escape unicode code point c to ASCII escape sequences - in char *output. output must have at least 12 bytes unused to - accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ - output[chars++] = '\\'; - switch (c) { - case '\\': output[chars++] = (char)c; break; - case '"': output[chars++] = (char)c; break; - case '\b': output[chars++] = 'b'; break; - case '\f': output[chars++] = 'f'; break; - case '\n': output[chars++] = 'n'; break; - case '\r': output[chars++] = 'r'; break; - case '\t': output[chars++] = 't'; break; - default: -#ifdef Py_UNICODE_WIDE - if (c >= 0x10000) { - /* UTF-16 surrogate pair */ - Py_UNICODE v = c - 0x10000; - c = 0xd800 | ((v >> 10) & 0x3ff); - output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; - c = 0xdc00 | (v & 0x3ff); - output[chars++] = '\\'; - } -#endif - output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; - } - return chars; -} - -static PyObject * -ascii_escape_unicode(PyObject *pystr) -{ - /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ - Py_ssize_t i; - Py_ssize_t input_chars; - Py_ssize_t output_size; - Py_ssize_t max_output_size; - Py_ssize_t chars; - PyObject *rval; - char *output; - Py_UNICODE *input_unicode; - - input_chars = PyUnicode_GET_SIZE(pystr); - input_unicode = PyUnicode_AS_UNICODE(pystr); - - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - max_output_size = 2 + (input_chars * MAX_EXPANSION); - rval = PyString_FromStringAndSize(NULL, output_size); - if (rval == NULL) { - return NULL; - } - output = PyString_AS_STRING(rval); - chars = 0; - output[chars++] = '"'; - for (i = 0; i < input_chars; i++) { - Py_UNICODE c = input_unicode[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - if (output_size - chars < (1 + MAX_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - Py_ssize_t new_output_size = output_size * 2; - /* This is an upper bound */ - if (new_output_size > max_output_size) { - new_output_size = max_output_size; - } - /* Make sure that the output size changed before resizing */ - if (new_output_size != output_size) { - output_size = new_output_size; - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } - } - } - output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } - return rval; -} - -static PyObject * -ascii_escape_str(PyObject *pystr) -{ - /* Take a PyString pystr and return a new ASCII-only escaped PyString */ - Py_ssize_t i; - Py_ssize_t input_chars; - Py_ssize_t output_size; - Py_ssize_t chars; - PyObject *rval; - char *output; - char *input_str; - - input_chars = PyString_GET_SIZE(pystr); - input_str = PyString_AS_STRING(pystr); - - /* Fast path for a string that's already ASCII */ - for (i = 0; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (!S_CHAR(c)) { - /* If we have to escape something, scan the string for unicode */ - Py_ssize_t j; - for (j = i; j < input_chars; j++) { - c = (Py_UNICODE)(unsigned char)input_str[j]; - if (c > 0x7f) { - /* We hit a non-ASCII character, bail to unicode mode */ - PyObject *uni; - uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); - if (uni == NULL) { - return NULL; - } - rval = ascii_escape_unicode(uni); - Py_DECREF(uni); - return rval; - } - } - break; - } - } - - if (i == input_chars) { - /* Input is already ASCII */ - output_size = 2 + input_chars; - } - else { - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - } - rval = PyString_FromStringAndSize(NULL, output_size); - if (rval == NULL) { - return NULL; - } - output = PyString_AS_STRING(rval); - output[0] = '"'; - - /* We know that everything up to i is ASCII already */ - chars = i + 1; - memcpy(&output[1], input_str, i); - - for (; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - /* An ASCII char can't possibly expand to a surrogate! */ - if (output_size - chars < (1 + MIN_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - output_size *= 2; - if (output_size > 2 + (input_chars * MIN_EXPANSION)) { - output_size = 2 + (input_chars * MIN_EXPANSION); - } - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } - } - output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } - return rval; -} - -static void -raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) -{ - /* Use the Python function simplejson.decoder.errmsg to raise a nice - looking ValueError exception */ - static PyObject *errmsg_fn = NULL; - PyObject *pymsg; - if (errmsg_fn == NULL) { - PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); - if (decoder == NULL) - return; - errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); - Py_DECREF(decoder); - if (errmsg_fn == NULL) - return; - } - pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); - if (pymsg) { - PyErr_SetObject(PyExc_ValueError, pymsg); - Py_DECREF(pymsg); - } -} - -static PyObject * -join_list_unicode(PyObject *lst) -{ - /* return u''.join(lst) */ - static PyObject *joinfn = NULL; - if (joinfn == NULL) { - PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); - if (ustr == NULL) - return NULL; - - joinfn = PyObject_GetAttrString(ustr, "join"); - Py_DECREF(ustr); - if (joinfn == NULL) - return NULL; - } - return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); -} - -static PyObject * -join_list_string(PyObject *lst) -{ - /* return ''.join(lst) */ - static PyObject *joinfn = NULL; - if (joinfn == NULL) { - PyObject *ustr = PyString_FromStringAndSize(NULL, 0); - if (ustr == NULL) - return NULL; - - joinfn = PyObject_GetAttrString(ustr, "join"); - Py_DECREF(ustr); - if (joinfn == NULL) - return NULL; - } - return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); -} - -static PyObject * -_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { - /* return (rval, idx) tuple, stealing reference to rval */ - PyObject *tpl; - PyObject *pyidx; - /* - steal a reference to rval, returns (rval, idx) - */ - if (rval == NULL) { - return NULL; - } - pyidx = PyInt_FromSsize_t(idx); - if (pyidx == NULL) { - Py_DECREF(rval); - return NULL; - } - tpl = PyTuple_New(2); - if (tpl == NULL) { - Py_DECREF(pyidx); - Py_DECREF(rval); - return NULL; - } - PyTuple_SET_ITEM(tpl, 0, rval); - PyTuple_SET_ITEM(tpl, 1, pyidx); - return tpl; -} - -static PyObject * -scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) -{ - /* Read the JSON string from PyString pystr. - end is the index of the first character after the quote. - encoding is the encoding of pystr (must be an ASCII superset) - if strict is zero then literal control characters are allowed - *next_end_ptr is a return-by-reference index of the character - after the end quote - - Return value is a new PyString (if ASCII-only) or PyUnicode - */ - PyObject *rval; - Py_ssize_t len = PyString_GET_SIZE(pystr); - Py_ssize_t begin = end - 1; - Py_ssize_t next = begin; - int has_unicode = 0; - char *buf = PyString_AS_STRING(pystr); - PyObject *chunks = PyList_New(0); - if (chunks == NULL) { - goto bail; - } - if (end < 0 || len <= end) { - PyErr_SetString(PyExc_ValueError, "end is out of bounds"); - goto bail; - } - while (1) { - /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; - PyObject *chunk = NULL; - for (next = end; next < len; next++) { - c = (unsigned char)buf[next]; - if (c == '"' || c == '\\') { - break; - } - else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); - goto bail; - } - else if (c > 0x7f) { - has_unicode = 1; - } - } - if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - /* Pick up this chunk if it's not zero length */ - if (next != end) { - PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); - if (strchunk == NULL) { - goto bail; - } - if (has_unicode) { - chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); - Py_DECREF(strchunk); - if (chunk == NULL) { - goto bail; - } - } - else { - chunk = strchunk; - } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); - } - next++; - if (c == '"') { - end = next; - break; - } - if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - c = buf[next]; - if (c != 'u') { - /* Non-unicode backslash escapes */ - end = next + 1; - switch (c) { - case '"': break; - case '\\': break; - case '/': break; - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - default: c = 0; - } - if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); - goto bail; - } - } - else { - c = 0; - next++; - end = next + 4; - if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); - goto bail; - } - /* Decode 4 hex digits */ - for (; next < end; next++) { - Py_UNICODE digit = buf[next]; - c <<= 4; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } -#ifdef Py_UNICODE_WIDE - /* Surrogate pair */ - if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - c2 <<= 4; - Py_UNICODE digit = buf[next]; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c2 |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c2 |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c2 |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); - } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; - } -#endif - } - if (c > 0x7f) { - has_unicode = 1; - } - if (has_unicode) { - chunk = PyUnicode_FromUnicode(&c, 1); - if (chunk == NULL) { - goto bail; - } - } - else { - char c_char = Py_CHARMASK(c); - chunk = PyString_FromStringAndSize(&c_char, 1); - if (chunk == NULL) { - goto bail; - } - } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); - } - - rval = join_list_string(chunks); - if (rval == NULL) { - goto bail; - } - Py_CLEAR(chunks); - *next_end_ptr = end; - return rval; -bail: - *next_end_ptr = -1; - Py_XDECREF(chunks); - return NULL; -} - - -static PyObject * -scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) -{ - /* Read the JSON string from PyUnicode pystr. - end is the index of the first character after the quote. - if strict is zero then literal control characters are allowed - *next_end_ptr is a return-by-reference index of the character - after the end quote - - Return value is a new PyUnicode - */ - PyObject *rval; - Py_ssize_t len = PyUnicode_GET_SIZE(pystr); - Py_ssize_t begin = end - 1; - Py_ssize_t next = begin; - const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); - PyObject *chunks = PyList_New(0); - if (chunks == NULL) { - goto bail; - } - if (end < 0 || len <= end) { - PyErr_SetString(PyExc_ValueError, "end is out of bounds"); - goto bail; - } - while (1) { - /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; - PyObject *chunk = NULL; - for (next = end; next < len; next++) { - c = buf[next]; - if (c == '"' || c == '\\') { - break; - } - else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); - goto bail; - } - } - if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - /* Pick up this chunk if it's not zero length */ - if (next != end) { - chunk = PyUnicode_FromUnicode(&buf[end], next - end); - if (chunk == NULL) { - goto bail; - } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); - } - next++; - if (c == '"') { - end = next; - break; - } - if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - c = buf[next]; - if (c != 'u') { - /* Non-unicode backslash escapes */ - end = next + 1; - switch (c) { - case '"': break; - case '\\': break; - case '/': break; - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - default: c = 0; - } - if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); - goto bail; - } - } - else { - c = 0; - next++; - end = next + 4; - if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); - goto bail; - } - /* Decode 4 hex digits */ - for (; next < end; next++) { - Py_UNICODE digit = buf[next]; - c <<= 4; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } -#ifdef Py_UNICODE_WIDE - /* Surrogate pair */ - if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - c2 <<= 4; - Py_UNICODE digit = buf[next]; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c2 |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c2 |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c2 |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); - } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; - } -#endif - } - chunk = PyUnicode_FromUnicode(&c, 1); - if (chunk == NULL) { - goto bail; - } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); - } - - rval = join_list_unicode(chunks); - if (rval == NULL) { - goto bail; - } - Py_DECREF(chunks); - *next_end_ptr = end; - return rval; -bail: - *next_end_ptr = -1; - Py_XDECREF(chunks); - return NULL; -} - -PyDoc_STRVAR(pydoc_scanstring, - "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" - "\n" - "Scan the string s for a JSON string. End is the index of the\n" - "character in s after the quote that started the JSON string.\n" - "Unescapes all valid JSON string escape sequences and raises ValueError\n" - "on attempt to decode an invalid string. If strict is False then literal\n" - "control characters are allowed in the string.\n" - "\n" - "Returns a tuple of the decoded string and the index of the character in s\n" - "after the end quote." -); - -static PyObject * -py_scanstring(PyObject* self UNUSED, PyObject *args) -{ - PyObject *pystr; - PyObject *rval; - Py_ssize_t end; - Py_ssize_t next_end = -1; - char *encoding = NULL; - int strict = 1; - if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { - return NULL; - } - if (encoding == NULL) { - encoding = DEFAULT_ENCODING; - } - if (PyString_Check(pystr)) { - rval = scanstring_str(pystr, end, encoding, strict, &next_end); - } - else if (PyUnicode_Check(pystr)) { - rval = scanstring_unicode(pystr, end, strict, &next_end); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; - } - return _build_rval_index_tuple(rval, next_end); -} - -PyDoc_STRVAR(pydoc_encode_basestring_ascii, - "encode_basestring_ascii(basestring) -> str\n" - "\n" - "Return an ASCII-only JSON representation of a Python string" -); - -static PyObject * -py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) -{ - /* Return an ASCII-only JSON representation of a Python string */ - /* METH_O */ - if (PyString_Check(pystr)) { - return ascii_escape_str(pystr); - } - else if (PyUnicode_Check(pystr)) { - return ascii_escape_unicode(pystr); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; - } -} - -static void -scanner_dealloc(PyObject *self) -{ - /* Deallocate scanner object */ - scanner_clear(self); - Py_TYPE(self)->tp_free(self); -} - -static int -scanner_traverse(PyObject *self, visitproc visit, void *arg) -{ - PyScannerObject *s; - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - Py_VISIT(s->encoding); - Py_VISIT(s->strict); - Py_VISIT(s->object_hook); - Py_VISIT(s->parse_float); - Py_VISIT(s->parse_int); - Py_VISIT(s->parse_constant); - return 0; -} - -static int -scanner_clear(PyObject *self) -{ - PyScannerObject *s; - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - Py_CLEAR(s->encoding); - Py_CLEAR(s->strict); - Py_CLEAR(s->object_hook); - Py_CLEAR(s->parse_float); - Py_CLEAR(s->parse_int); - Py_CLEAR(s->parse_constant); - return 0; -} - -static PyObject * -_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON object from PyString pystr. - idx is the index of the first character after the opening curly brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing curly brace. - - Returns a new PyObject (usually a dict, but object_hook can change that) - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *rval = PyDict_New(); - PyObject *key = NULL; - PyObject *val = NULL; - char *encoding = PyString_AS_STRING(s->encoding); - int strict = PyObject_IsTrue(s->strict); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { - while (idx <= end_idx) { - /* read key */ - if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); - goto bail; - } - key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); - if (key == NULL) - goto bail; - idx = next_idx; - - /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); - goto bail; - } - idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* read any JSON data type */ - val = scan_once_str(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (PyDict_SetItem(rval, key, val) == -1) - goto bail; - - Py_CLEAR(key); - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the object is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == '}') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - /* if object_hook is not None: rval = object_hook(rval) */ - if (s->object_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); - if (val == NULL) - goto bail; - Py_DECREF(rval); - rval = val; - val = NULL; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(key); - Py_XDECREF(val); - Py_DECREF(rval); - return NULL; -} - -static PyObject * -_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON object from PyUnicode pystr. - idx is the index of the first character after the opening curly brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing curly brace. - - Returns a new PyObject (usually a dict, but object_hook can change that) - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyDict_New(); - PyObject *key = NULL; - int strict = PyObject_IsTrue(s->strict); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { - while (idx <= end_idx) { - /* read key */ - if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); - goto bail; - } - key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); - if (key == NULL) - goto bail; - idx = next_idx; - - /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); - goto bail; - } - idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* read any JSON term */ - val = scan_once_unicode(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (PyDict_SetItem(rval, key, val) == -1) - goto bail; - - Py_CLEAR(key); - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the object is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == '}') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - - /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - - /* if object_hook is not None: rval = object_hook(rval) */ - if (s->object_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); - if (val == NULL) - goto bail; - Py_DECREF(rval); - rval = val; - val = NULL; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(key); - Py_XDECREF(val); - Py_DECREF(rval); - return NULL; -} - -static PyObject * -_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON array from PyString pystr. - idx is the index of the first character after the opening brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing brace. - - Returns a new PyList - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyList_New(0); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { - while (idx <= end_idx) { - - /* read any JSON term and de-tuplefy the (rval, idx) */ - val = scan_once_str(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (PyList_Append(rval, val) == -1) - goto bail; - - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the array is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == ']') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - - /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(val); - Py_DECREF(rval); - return NULL; -} - -static PyObject * -_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON array from PyString pystr. - idx is the index of the first character after the opening brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing brace. - - Returns a new PyList - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyList_New(0); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { - while (idx <= end_idx) { - - /* read any JSON term */ - val = scan_once_unicode(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (PyList_Append(rval, val) == -1) - goto bail; - - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the array is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == ']') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - - /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(val); - Py_DECREF(rval); - return NULL; -} - -static PyObject * -_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON constant from PyString pystr. - constant is the constant string that was found - ("NaN", "Infinity", "-Infinity"). - idx is the index of the first character of the constant - *next_idx_ptr is a return-by-reference index to the first character after - the constant. - - Returns the result of parse_constant - */ - PyObject *cstr; - PyObject *rval; - /* constant is "NaN", "Infinity", or "-Infinity" */ - cstr = PyString_InternFromString(constant); - if (cstr == NULL) - return NULL; - - /* rval = parse_constant(constant) */ - rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); - idx += PyString_GET_SIZE(cstr); - Py_DECREF(cstr); - *next_idx_ptr = idx; - return rval; -} - -static PyObject * -_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { - /* Read a JSON number from PyString pystr. - idx is the index of the first character of the number - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of that number: - PyInt, PyLong, or PyFloat. - May return other types if parse_int or parse_float are set - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - Py_ssize_t idx = start; - int is_float = 0; - PyObject *rval; - PyObject *numstr; - - /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - } - - /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { - idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { - idx++; - } - /* no integer digits, error */ - else { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { - is_float = 1; - idx += 2; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - - /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { - - /* save the index of the 'e' or 'E' just in case we need to backtrack */ - Py_ssize_t e_start = idx; - idx++; - - /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; - - /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - - /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { - is_float = 1; - } - else { - idx = e_start; - } - } - - /* copy the section we determined to be a number */ - numstr = PyString_FromStringAndSize(&str[start], idx - start); - if (numstr == NULL) - return NULL; - if (is_float) { - /* parse as a float using a fast path if available, otherwise call user defined method */ - if (s->parse_float != (PyObject *)&PyFloat_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); - } - else { - rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); - } - } - else { - /* parse as an int using a fast path if available, otherwise call user defined method */ - if (s->parse_int != (PyObject *)&PyInt_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); - } - else { - rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); - } - } - Py_DECREF(numstr); - *next_idx_ptr = idx; - return rval; -} - -static PyObject * -_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { - /* Read a JSON number from PyUnicode pystr. - idx is the index of the first character of the number - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of that number: - PyInt, PyLong, or PyFloat. - May return other types if parse_int or parse_float are set - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - Py_ssize_t idx = start; - int is_float = 0; - PyObject *rval; - PyObject *numstr; - - /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - } - - /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { - idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { - idx++; - } - /* no integer digits, error */ - else { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { - is_float = 1; - idx += 2; - while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - - /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { - Py_ssize_t e_start = idx; - idx++; - - /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; - - /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - - /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { - is_float = 1; - } - else { - idx = e_start; - } - } - - /* copy the section we determined to be a number */ - numstr = PyUnicode_FromUnicode(&str[start], idx - start); - if (numstr == NULL) - return NULL; - if (is_float) { - /* parse as a float using a fast path if available, otherwise call user defined method */ - if (s->parse_float != (PyObject *)&PyFloat_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); - } - else { - rval = PyFloat_FromString(numstr, NULL); - } - } - else { - /* no fast path for unicode -> int, just call */ - rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); - } - Py_DECREF(numstr); - *next_idx_ptr = idx; - return rval; -} - -static PyObject * -scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) -{ - /* Read one JSON term (of any kind) from PyString pystr. - idx is the index of the first character of the term - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of the term. - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t length = PyString_GET_SIZE(pystr); - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - switch (str[idx]) { - case '"': - /* string */ - return scanstring_str(pystr, idx + 1, - PyString_AS_STRING(s->encoding), - PyObject_IsTrue(s->strict), - next_idx_ptr); - case '{': - /* object */ - return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); - case '[': - /* array */ - return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); - case 'n': - /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { - Py_INCREF(Py_None); - *next_idx_ptr = idx + 4; - return Py_None; - } - break; - case 't': - /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { - Py_INCREF(Py_True); - *next_idx_ptr = idx + 4; - return Py_True; - } - break; - case 'f': - /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { - Py_INCREF(Py_False); - *next_idx_ptr = idx + 5; - return Py_False; - } - break; - case 'N': - /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); - } - break; - case 'I': - /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); - } - break; - case '-': - /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); - } - break; - } - /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_str(s, pystr, idx, next_idx_ptr); -} - -static PyObject * -scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) -{ - /* Read one JSON term (of any kind) from PyUnicode pystr. - idx is the index of the first character of the term - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of the term. - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t length = PyUnicode_GET_SIZE(pystr); - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - switch (str[idx]) { - case '"': - /* string */ - return scanstring_unicode(pystr, idx + 1, - PyObject_IsTrue(s->strict), - next_idx_ptr); - case '{': - /* object */ - return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); - case '[': - /* array */ - return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); - case 'n': - /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { - Py_INCREF(Py_None); - *next_idx_ptr = idx + 4; - return Py_None; - } - break; - case 't': - /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { - Py_INCREF(Py_True); - *next_idx_ptr = idx + 4; - return Py_True; - } - break; - case 'f': - /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { - Py_INCREF(Py_False); - *next_idx_ptr = idx + 5; - return Py_False; - } - break; - case 'N': - /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); - } - break; - case 'I': - /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); - } - break; - case '-': - /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); - } - break; - } - /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_unicode(s, pystr, idx, next_idx_ptr); -} - -static PyObject * -scanner_call(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* Python callable interface to scan_once_{str,unicode} */ - PyObject *pystr; - PyObject *rval; - Py_ssize_t idx; - Py_ssize_t next_idx = -1; - static char *kwlist[] = {"string", "idx", NULL}; - PyScannerObject *s; - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) - return NULL; - - if (PyString_Check(pystr)) { - rval = scan_once_str(s, pystr, idx, &next_idx); - } - else if (PyUnicode_Check(pystr)) { - rval = scan_once_unicode(s, pystr, idx, &next_idx); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; - } - return _build_rval_index_tuple(rval, next_idx); -} - -static PyObject * -scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyScannerObject *s; - s = (PyScannerObject *)type->tp_alloc(type, 0); - if (s != NULL) { - s->encoding = NULL; - s->strict = NULL; - s->object_hook = NULL; - s->parse_float = NULL; - s->parse_int = NULL; - s->parse_constant = NULL; - } - return (PyObject *)s; -} - -static int -scanner_init(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* Initialize Scanner object */ - PyObject *ctx; - static char *kwlist[] = {"context", NULL}; - PyScannerObject *s; - - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) - return -1; - - /* PyString_AS_STRING is used on encoding */ - s->encoding = PyObject_GetAttrString(ctx, "encoding"); - if (s->encoding == Py_None) { - Py_DECREF(Py_None); - s->encoding = PyString_InternFromString(DEFAULT_ENCODING); - } - else if (PyUnicode_Check(s->encoding)) { - PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); - Py_DECREF(s->encoding); - s->encoding = tmp; - } - if (s->encoding == NULL || !PyString_Check(s->encoding)) - goto bail; - - /* All of these will fail "gracefully" so we don't need to verify them */ - s->strict = PyObject_GetAttrString(ctx, "strict"); - if (s->strict == NULL) - goto bail; - s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); - if (s->object_hook == NULL) - goto bail; - s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); - if (s->parse_float == NULL) - goto bail; - s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); - if (s->parse_int == NULL) - goto bail; - s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); - if (s->parse_constant == NULL) - goto bail; - - return 0; - -bail: - Py_CLEAR(s->encoding); - Py_CLEAR(s->strict); - Py_CLEAR(s->object_hook); - Py_CLEAR(s->parse_float); - Py_CLEAR(s->parse_int); - Py_CLEAR(s->parse_constant); - return -1; -} - -PyDoc_STRVAR(scanner_doc, "JSON scanner object"); - -static -PyTypeObject PyScannerType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ - "simplejson._speedups.Scanner", /* tp_name */ - sizeof(PyScannerObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - scanner_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - scanner_call, /* tp_call */ - 0, /* tp_str */ - 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ - 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ - scanner_doc, /* tp_doc */ - scanner_traverse, /* tp_traverse */ - scanner_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - scanner_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - scanner_init, /* tp_init */ - 0,/* PyType_GenericAlloc, */ /* tp_alloc */ - scanner_new, /* tp_new */ - 0,/* PyObject_GC_Del, */ /* tp_free */ -}; - -static PyObject * -encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyEncoderObject *s; - s = (PyEncoderObject *)type->tp_alloc(type, 0); - if (s != NULL) { - s->markers = NULL; - s->defaultfn = NULL; - s->encoder = NULL; - s->indent = NULL; - s->key_separator = NULL; - s->item_separator = NULL; - s->sort_keys = NULL; - s->skipkeys = NULL; - } - return (PyObject *)s; -} - -static int -encoder_init(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* initialize Encoder object */ - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; - - PyEncoderObject *s; - PyObject *allow_nan; - - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, - &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) - return -1; - - Py_INCREF(s->markers); - Py_INCREF(s->defaultfn); - Py_INCREF(s->encoder); - Py_INCREF(s->indent); - Py_INCREF(s->key_separator); - Py_INCREF(s->item_separator); - Py_INCREF(s->sort_keys); - Py_INCREF(s->skipkeys); - s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); - s->allow_nan = PyObject_IsTrue(allow_nan); - return 0; -} - -static PyObject * -encoder_call(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* Python callable interface to encode_listencode_obj */ - static char *kwlist[] = {"obj", "_current_indent_level", NULL}; - PyObject *obj; - PyObject *rval; - Py_ssize_t indent_level; - PyEncoderObject *s; - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, - &obj, _convertPyInt_AsSsize_t, &indent_level)) - return NULL; - rval = PyList_New(0); - if (rval == NULL) - return NULL; - if (encoder_listencode_obj(s, rval, obj, indent_level)) { - Py_DECREF(rval); - return NULL; - } - return rval; -} - -static PyObject * -_encoded_const(PyObject *obj) -{ - /* Return the JSON string representation of None, True, False */ - if (obj == Py_None) { - static PyObject *s_null = NULL; - if (s_null == NULL) { - s_null = PyString_InternFromString("null"); - } - Py_INCREF(s_null); - return s_null; - } - else if (obj == Py_True) { - static PyObject *s_true = NULL; - if (s_true == NULL) { - s_true = PyString_InternFromString("true"); - } - Py_INCREF(s_true); - return s_true; - } - else if (obj == Py_False) { - static PyObject *s_false = NULL; - if (s_false == NULL) { - s_false = PyString_InternFromString("false"); - } - Py_INCREF(s_false); - return s_false; - } - else { - PyErr_SetString(PyExc_ValueError, "not a const"); - return NULL; - } -} - -static PyObject * -encoder_encode_float(PyEncoderObject *s, PyObject *obj) -{ - /* Return the JSON representation of a PyFloat */ - double i = PyFloat_AS_DOUBLE(obj); - if (!Py_IS_FINITE(i)) { - if (!s->allow_nan) { - PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); - return NULL; - } - if (i > 0) { - return PyString_FromString("Infinity"); - } - else if (i < 0) { - return PyString_FromString("-Infinity"); - } - else { - return PyString_FromString("NaN"); - } - } - /* Use a better float format here? */ - return PyObject_Repr(obj); -} - -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj) -{ - /* Return the JSON representation of a string */ - if (s->fast_encode) - return py_encode_basestring_ascii(NULL, obj); - else - return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); -} - -static int -_steal_list_append(PyObject *lst, PyObject *stolen) -{ - /* Append stolen and then decrement its reference count */ - int rval = PyList_Append(lst, stolen); - Py_DECREF(stolen); - return rval; -} - -static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) -{ - /* Encode Python object obj to a JSON term, rval is a PyList */ - PyObject *newobj; - int rv; - - if (obj == Py_None || obj == Py_True || obj == Py_False) { - PyObject *cstr = _encoded_const(obj); - if (cstr == NULL) - return -1; - return _steal_list_append(rval, cstr); - } - else if (PyString_Check(obj) || PyUnicode_Check(obj)) - { - PyObject *encoded = encoder_encode_string(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyInt_Check(obj) || PyLong_Check(obj)) { - PyObject *encoded = PyObject_Str(obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyFloat_Check(obj)) { - PyObject *encoded = encoder_encode_float(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyList_Check(obj) || PyTuple_Check(obj)) { - return encoder_listencode_list(s, rval, obj, indent_level); - } - else if (PyDict_Check(obj)) { - return encoder_listencode_dict(s, rval, obj, indent_level); - } - else { - PyObject *ident = NULL; - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(obj); - if (ident == NULL) - return -1; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - Py_DECREF(ident); - return -1; - } - if (PyDict_SetItem(s->markers, ident, obj)) { - Py_DECREF(ident); - return -1; - } - } - newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); - if (newobj == NULL) { - Py_XDECREF(ident); - return -1; - } - rv = encoder_listencode_obj(s, rval, newobj, indent_level); - Py_DECREF(newobj); - if (rv) { - Py_XDECREF(ident); - return -1; - } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) { - Py_XDECREF(ident); - return -1; - } - Py_XDECREF(ident); - } - return rv; - } -} - -static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) -{ - /* Encode Python dict dct a JSON term, rval is a PyList */ - static PyObject *open_dict = NULL; - static PyObject *close_dict = NULL; - static PyObject *empty_dict = NULL; - PyObject *kstr = NULL; - PyObject *ident = NULL; - PyObject *key, *value; - Py_ssize_t pos; - int skipkeys; - Py_ssize_t idx; - - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { - open_dict = PyString_InternFromString("{"); - close_dict = PyString_InternFromString("}"); - empty_dict = PyString_InternFromString("{}"); - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) - return -1; - } - if (PyDict_Size(dct) == 0) - return PyList_Append(rval, empty_dict); - - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(dct); - if (ident == NULL) - goto bail; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - goto bail; - } - if (PyDict_SetItem(s->markers, ident, dct)) { - goto bail; - } - } - - if (PyList_Append(rval, open_dict)) - goto bail; - - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level += 1; - /* - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) - separator = _item_separator + newline_indent - buf += newline_indent - */ - } - - /* TODO: C speedup not implemented for sort_keys */ - - pos = 0; - skipkeys = PyObject_IsTrue(s->skipkeys); - idx = 0; - while (PyDict_Next(dct, &pos, &key, &value)) { - PyObject *encoded; - - if (PyString_Check(key) || PyUnicode_Check(key)) { - Py_INCREF(key); - kstr = key; - } - else if (PyFloat_Check(key)) { - kstr = encoder_encode_float(s, key); - if (kstr == NULL) - goto bail; - } - else if (PyInt_Check(key) || PyLong_Check(key)) { - kstr = PyObject_Str(key); - if (kstr == NULL) - goto bail; - } - else if (key == Py_True || key == Py_False || key == Py_None) { - kstr = _encoded_const(key); - if (kstr == NULL) - goto bail; - } - else if (skipkeys) { - continue; - } - else { - /* TODO: include repr of key */ - PyErr_SetString(PyExc_ValueError, "keys must be a string"); - goto bail; - } - - if (idx) { - if (PyList_Append(rval, s->item_separator)) - goto bail; - } - - encoded = encoder_encode_string(s, kstr); - Py_CLEAR(kstr); - if (encoded == NULL) - goto bail; - if (PyList_Append(rval, encoded)) { - Py_DECREF(encoded); - goto bail; - } - Py_DECREF(encoded); - if (PyList_Append(rval, s->key_separator)) - goto bail; - if (encoder_listencode_obj(s, rval, value, indent_level)) - goto bail; - idx += 1; - } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) - goto bail; - Py_CLEAR(ident); - } - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level -= 1; - /* - yield '\n' + (' ' * (_indent * _current_indent_level)) - */ - } - if (PyList_Append(rval, close_dict)) - goto bail; - return 0; - -bail: - Py_XDECREF(kstr); - Py_XDECREF(ident); - return -1; -} - - -static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) -{ - /* Encode Python list seq to a JSON term, rval is a PyList */ - static PyObject *open_array = NULL; - static PyObject *close_array = NULL; - static PyObject *empty_array = NULL; - PyObject *ident = NULL; - PyObject *s_fast = NULL; - Py_ssize_t num_items; - PyObject **seq_items; - Py_ssize_t i; - - if (open_array == NULL || close_array == NULL || empty_array == NULL) { - open_array = PyString_InternFromString("["); - close_array = PyString_InternFromString("]"); - empty_array = PyString_InternFromString("[]"); - if (open_array == NULL || close_array == NULL || empty_array == NULL) - return -1; - } - ident = NULL; - s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); - if (s_fast == NULL) - return -1; - num_items = PySequence_Fast_GET_SIZE(s_fast); - if (num_items == 0) { - Py_DECREF(s_fast); - return PyList_Append(rval, empty_array); - } - - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(seq); - if (ident == NULL) - goto bail; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - goto bail; - } - if (PyDict_SetItem(s->markers, ident, seq)) { - goto bail; - } - } - - seq_items = PySequence_Fast_ITEMS(s_fast); - if (PyList_Append(rval, open_array)) - goto bail; - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level += 1; - /* - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) - separator = _item_separator + newline_indent - buf += newline_indent - */ - } - for (i = 0; i < num_items; i++) { - PyObject *obj = seq_items[i]; - if (i) { - if (PyList_Append(rval, s->item_separator)) - goto bail; - } - if (encoder_listencode_obj(s, rval, obj, indent_level)) - goto bail; - } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) - goto bail; - Py_CLEAR(ident); - } - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level -= 1; - /* - yield '\n' + (' ' * (_indent * _current_indent_level)) - */ - } - if (PyList_Append(rval, close_array)) - goto bail; - Py_DECREF(s_fast); - return 0; - -bail: - Py_XDECREF(ident); - Py_DECREF(s_fast); - return -1; -} - -static void -encoder_dealloc(PyObject *self) -{ - /* Deallocate Encoder */ - encoder_clear(self); - Py_TYPE(self)->tp_free(self); -} - -static int -encoder_traverse(PyObject *self, visitproc visit, void *arg) -{ - PyEncoderObject *s; - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - Py_VISIT(s->markers); - Py_VISIT(s->defaultfn); - Py_VISIT(s->encoder); - Py_VISIT(s->indent); - Py_VISIT(s->key_separator); - Py_VISIT(s->item_separator); - Py_VISIT(s->sort_keys); - Py_VISIT(s->skipkeys); - return 0; -} - -static int -encoder_clear(PyObject *self) -{ - /* Deallocate Encoder */ - PyEncoderObject *s; - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - Py_CLEAR(s->markers); - Py_CLEAR(s->defaultfn); - Py_CLEAR(s->encoder); - Py_CLEAR(s->indent); - Py_CLEAR(s->key_separator); - Py_CLEAR(s->item_separator); - Py_CLEAR(s->sort_keys); - Py_CLEAR(s->skipkeys); - return 0; -} - -PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); - -static -PyTypeObject PyEncoderType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ - "simplejson._speedups.Encoder", /* tp_name */ - sizeof(PyEncoderObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - encoder_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - encoder_call, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ - encoder_doc, /* tp_doc */ - encoder_traverse, /* tp_traverse */ - encoder_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - encoder_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - encoder_init, /* tp_init */ - 0, /* tp_alloc */ - encoder_new, /* tp_new */ - 0, /* tp_free */ -}; - -static PyMethodDef speedups_methods[] = { - {"encode_basestring_ascii", - (PyCFunction)py_encode_basestring_ascii, - METH_O, - pydoc_encode_basestring_ascii}, - {"scanstring", - (PyCFunction)py_scanstring, - METH_VARARGS, - pydoc_scanstring}, - {NULL, NULL, 0, NULL} -}; - -PyDoc_STRVAR(module_doc, -"simplejson speedups\n"); - -void -init_speedups(void) -{ - PyObject *m; - PyScannerType.tp_new = PyType_GenericNew; - if (PyType_Ready(&PyScannerType) < 0) - return; - PyEncoderType.tp_new = PyType_GenericNew; - if (PyType_Ready(&PyEncoderType) < 0) - return; - m = Py_InitModule3("_speedups", speedups_methods, module_doc); - Py_INCREF((PyObject*)&PyScannerType); - PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); - Py_INCREF((PyObject*)&PyEncoderType); - PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); -} diff --git a/framework/pym/simplejson/compat.py b/framework/pym/simplejson/compat.py new file mode 100644 index 0000000000..5fc1412844 --- /dev/null +++ b/framework/pym/simplejson/compat.py @@ -0,0 +1,34 @@ +"""Python 3 compatibility shims +""" +import sys +if sys.version_info[0] < 3: + PY3 = False + def b(s): + return s + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + BytesIO = StringIO + text_type = unicode + binary_type = str + string_types = (basestring,) + integer_types = (int, long) + unichr = unichr + reload_module = reload +else: + PY3 = True + if sys.version_info[:2] >= (3, 4): + from importlib import reload as reload_module + else: + from imp import reload as reload_module + def b(s): + return bytes(s, 'latin1') + from io import StringIO, BytesIO + text_type = str + binary_type = bytes + string_types = (str,) + integer_types = (int,) + unichr = chr + +long_type = integer_types[-1] diff --git a/framework/pym/simplejson/decoder.py b/framework/pym/simplejson/decoder.py index b769ea486c..7f0b0568fd 100644 --- a/framework/pym/simplejson/decoder.py +++ b/framework/pym/simplejson/decoder.py @@ -1,53 +1,37 @@ """Implementation of JSONDecoder """ +from __future__ import absolute_import import re import sys import struct - -from simplejson.scanner import make_scanner -try: - from simplejson._speedups import scanstring as c_scanstring -except ImportError: - c_scanstring = None - +from .compat import PY3, unichr +from .scanner import make_scanner, JSONDecodeError + +def _import_c_scanstring(): + try: + from ._speedups import scanstring + return scanstring + except ImportError: + return None +c_scanstring = _import_c_scanstring() + +# NOTE (3.1.0): JSONDecodeError may still be imported from this module for +# compatibility, but it was never in the __all__ __all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL def _floatconstants(): - _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') - if sys.byteorder != 'big': - _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] - nan, inf = struct.unpack('dd', _BYTES) + if sys.version_info < (2, 6): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + nan, inf = struct.unpack('>dd', _BYTES) + else: + nan = float('nan') + inf = float('inf') return nan, inf, -inf NaN, PosInf, NegInf = _floatconstants() - -def linecol(doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - if lineno == 1: - colno = pos - else: - colno = pos - doc.rindex('\n', 0, pos) - return lineno, colno - - -def errmsg(msg, doc, pos, end=None): - # Note that this function is called from _speedups - lineno, colno = linecol(doc, pos) - if end is None: - #fmt = '{0}: line {1} column {2} (char {3})' - #return fmt.format(msg, lineno, colno, pos) - fmt = '%s: line %d column %d (char %d)' - return fmt % (msg, lineno, colno, pos) - endlineno, endcolno = linecol(doc, end) - #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' - #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) - fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' - return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) - - _CONSTANTS = { '-Infinity': NegInf, 'Infinity': PosInf, @@ -62,13 +46,15 @@ def errmsg(msg, doc, pos, end=None): DEFAULT_ENCODING = "utf-8" -def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): +def py_scanstring(s, end, encoding=None, strict=True, + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join, + _PY3=PY3, _maxunicode=sys.maxunicode): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError on attempt to decode an invalid string. If strict is False then literal control characters are allowed in the string. - + Returns a tuple of the decoded string and the index of the character in s after the end quote.""" if encoding is None: @@ -79,13 +65,13 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU while 1: chunk = _m(s, end) if chunk is None: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) + raise JSONDecodeError( + "Unterminated string starting at", s, begin) end = chunk.end() content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: - if not isinstance(content, unicode): + if not _PY3 and not isinstance(content, unicode): content = unicode(content, encoding) _append(content) # Terminator is the end of string, a literal control character, @@ -94,49 +80,57 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU break elif terminator != '\\': if strict: - msg = "Invalid control character %r at" % (terminator,) - #msg = "Invalid control character {0!r} at".format(terminator) - raise ValueError(errmsg(msg, s, end)) + msg = "Invalid control character %r at" + raise JSONDecodeError(msg, s, end) else: _append(terminator) continue try: esc = s[end] except IndexError: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) + raise JSONDecodeError( + "Unterminated string starting at", s, begin) # If not a unicode escape sequence, must be in the lookup table if esc != 'u': try: char = _b[esc] except KeyError: - msg = "Invalid \\escape: " + repr(esc) - raise ValueError(errmsg(msg, s, end)) + msg = "Invalid \\X escape sequence %r" + raise JSONDecodeError(msg, s, end) end += 1 else: # Unicode escape sequence + msg = "Invalid \\uXXXX escape sequence" esc = s[end + 1:end + 5] - next_end = end + 5 - if len(esc) != 4: - msg = "Invalid \\uXXXX escape" - raise ValueError(errmsg(msg, s, end)) - uni = int(esc, 16) + escX = esc[1:2] + if len(esc) != 4 or escX == 'x' or escX == 'X': + raise JSONDecodeError(msg, s, end - 1) + try: + uni = int(esc, 16) + except ValueError: + raise JSONDecodeError(msg, s, end - 1) + end += 5 # Check for surrogate pair on UCS-4 systems - if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': - raise ValueError(errmsg(msg, s, end)) - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: - raise ValueError(errmsg(msg, s, end)) - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 + # Note that this will join high/low surrogate pairs + # but will also pass unpaired surrogates through + if (_maxunicode > 65535 and + uni & 0xfc00 == 0xd800 and + s[end:end + 2] == '\\u'): + esc2 = s[end + 2:end + 6] + escX = esc2[1:2] + if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 == 0xdc00: + uni = 0x10000 + (((uni - 0xd800) << 10) | + (uni2 - 0xdc00)) + end += 6 char = unichr(uni) - end = next_end # Append the unescaped character _append(char) - return u''.join(chunks), end + return _join(chunks), end # Use speedup if available @@ -145,8 +139,15 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' -def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): - pairs = {} +def JSONObject(state, encoding, strict, scan_once, object_hook, + object_pairs_hook, memo=None, + _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state + # Backwards compatibility + if memo is None: + memo = {} + memo_get = memo.setdefault + pairs = [] # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] @@ -157,19 +158,28 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE nextchar = s[end:end + 1] # Trivial empty object if nextchar == '}': + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + 1 + pairs = {} + if object_hook is not None: + pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end)) + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end) end += 1 while True: key, end = scanstring(s, end, encoding, strict) + key = memo_get(key, key) # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': - raise ValueError(errmsg("Expecting : delimiter", s, end)) + raise JSONDecodeError("Expecting ':' delimiter", s, end) end += 1 @@ -181,11 +191,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE except IndexError: pass - try: - value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) - pairs[key] = value + value, end = scan_once(s, end) + pairs.append((key, value)) try: nextchar = s[end] @@ -199,7 +206,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE if nextchar == '}': break elif nextchar != ',': - raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) try: nextchar = s[end] @@ -214,13 +221,20 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE end += 1 if nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end - 1)) - + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end - 1) + + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + pairs = dict(pairs) if object_hook is not None: pairs = object_hook(pairs) return pairs, end -def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state values = [] nextchar = s[end:end + 1] if nextchar in _ws: @@ -229,12 +243,11 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): # Look-ahead for trivial empty array if nextchar == ']': return values, end + 1 + elif nextchar == '': + raise JSONDecodeError("Expecting value or ']'", s, end) _append = values.append while True: - try: - value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) + value, end = scan_once(s, end) _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -244,7 +257,7 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': break elif nextchar != ',': - raise ValueError(errmsg("Expecting , delimiter", s, end)) + raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) try: if s[end] in _ws: @@ -268,7 +281,7 @@ class JSONDecoder(object): +---------------+-------------------+ | array | list | +---------------+-------------------+ - | string | unicode | + | string | str, unicode | +---------------+-------------------+ | number (int) | int, long | +---------------+-------------------+ @@ -287,37 +300,56 @@ class JSONDecoder(object): """ def __init__(self, encoding=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, strict=True): - """``encoding`` determines the encoding used to interpret any ``str`` - objects decoded by this instance (utf-8 by default). It has no - effect when decoding ``unicode`` objects. + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): + """ + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. Note that currently only encodings that are a superset of ASCII work, - strings of other encodings should be passed in as ``unicode``. + strings of other encodings should be passed in as :class:`unicode`. - ``object_hook``, if specified, will be called with the result - of every JSON object decoded and its return value will be used in - place of the given ``dict``. This can be used to provide custom + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom deserializations (e.g. to support JSON-RPC class hinting). - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN. - This can be used to raise an exception if invalid JSON numbers - are encountered. + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + *strict* controls the parser's behavior when it encounters an + invalid control character in a string. The default setting of + ``True`` means that unescaped control characters are parse errors, if + ``False`` then control characters will be allowed in strings. """ + if encoding is None: + encoding = DEFAULT_ENCODING self.encoding = encoding self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook self.parse_float = parse_float or float self.parse_int = parse_int or int self.parse_constant = parse_constant or _CONSTANTS.__getitem__ @@ -325,30 +357,44 @@ def __init__(self, encoding=None, object_hook=None, parse_float=None, self.parse_object = JSONObject self.parse_array = JSONArray self.parse_string = scanstring + self.memo = {} self.scan_once = make_scanner(self) - def decode(self, s, _w=WHITESPACE.match): + def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): """Return the Python representation of ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) """ - obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + if _PY3 and isinstance(s, bytes): + s = str(s, self.encoding) + obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): - raise ValueError(errmsg("Extra data", s, end, len(s))) + raise JSONDecodeError("Extra data", s, end, len(s)) return obj - def raw_decode(self, s, idx=0): - """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning - with a JSON document) and return a 2-tuple of the Python + def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` + beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. + Optionally, ``idx`` can be used to specify an offset in ``s`` where + the JSON document begins. This can be used to decode a JSON document from a string that may have extraneous data at the end. """ - try: - obj, end = self.scan_once(s, idx) - except StopIteration: - raise ValueError("No JSON object could be decoded") - return obj, end + if idx < 0: + # Ensure that raw_decode bails on negative indexes, the regex + # would otherwise mask this behavior. #98 + raise JSONDecodeError('Expecting value', s, idx) + if _PY3 and not isinstance(s, str): + raise TypeError("Input string must be text, not bytes") + # strip UTF-8 bom + if len(s) > idx: + ord0 = ord(s[idx]) + if ord0 == 0xfeff: + idx += 1 + elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': + idx += 3 + return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/framework/pym/simplejson/encoder.py b/framework/pym/simplejson/encoder.py index cf58290366..7ea172e7d2 100644 --- a/framework/pym/simplejson/encoder.py +++ b/framework/pym/simplejson/encoder.py @@ -1,17 +1,23 @@ """Implementation of JSONEncoder """ +from __future__ import absolute_import import re - -try: - from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii -except ImportError: - c_encode_basestring_ascii = None -try: - from simplejson._speedups import make_encoder as c_make_encoder -except ImportError: - c_make_encoder = None - -ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +from operator import itemgetter +# Do not import Decimal directly to avoid reload issues +import decimal +from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3 +def _import_speedups(): + try: + from . import _speedups + return _speedups.encode_basestring_ascii, _speedups.make_encoder + except ImportError: + return None, None +c_encode_basestring_ascii, c_make_encoder = _import_speedups() + +from .decoder import PosInf +from .raw_json import RawJSON + +ESCAPE = re.compile(r'[\x00-\x1f\\"]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -27,25 +33,57 @@ #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) -# Assume this produces an infinity on all machines (probably not guaranteed) -INFINITY = float('1e66666') FLOAT_REPR = repr -def encode_basestring(s): +def encode_basestring(s, _PY3=PY3, _q=u'"'): """Return a JSON representation of a Python string """ + if _PY3: + if isinstance(s, bytes): + s = str(s, 'utf-8') + elif type(s) is not str: + # convert an str subclass instance to exact str + # raise a TypeError otherwise + s = str.__str__(s) + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = unicode(s, 'utf-8') + elif type(s) not in (str, unicode): + # convert an str subclass instance to exact str + # convert a unicode subclass instance to exact unicode + # raise a TypeError otherwise + if isinstance(s, str): + s = str.__str__(s) + else: + s = unicode.__getnewargs__(s)[0] def replace(match): return ESCAPE_DCT[match.group(0)] - return '"' + ESCAPE.sub(replace, s) + '"' + return _q + ESCAPE.sub(replace, s) + _q -def py_encode_basestring_ascii(s): +def py_encode_basestring_ascii(s, _PY3=PY3): """Return an ASCII-only JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, bytes): + s = str(s, 'utf-8') + elif type(s) is not str: + # convert an str subclass instance to exact str + # raise a TypeError otherwise + s = str.__str__(s) + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = unicode(s, 'utf-8') + elif type(s) not in (str, unicode): + # convert an str subclass instance to exact str + # convert a unicode subclass instance to exact unicode + # raise a TypeError otherwise + if isinstance(s, str): + s = str.__str__(s) + else: + s = unicode.__getnewargs__(s)[0] def replace(match): s = match.group(0) try: @@ -65,7 +103,8 @@ def replace(match): return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' -encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii +encode_basestring_ascii = ( + c_encode_basestring_ascii or py_encode_basestring_ascii) class JSONEncoder(object): """Extensible JSON encoder for Python data structures. @@ -75,7 +114,7 @@ class JSONEncoder(object): +-------------------+---------------+ | Python | JSON | +===================+===============+ - | dict | object | + | dict, namedtuple | object | +-------------------+---------------+ | list, tuple | array | +-------------------+---------------+ @@ -98,9 +137,14 @@ class JSONEncoder(object): """ item_separator = ', ' key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None): + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=True, namedtuple_as_object=True, + tuple_as_array=True, bigint_as_string=False, + item_sort_key=None, for_json=False, ignore_nan=False, + int_as_string_bitcount=None, iterable_as_array=False): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt @@ -125,14 +169,17 @@ def __init__(self, skipkeys=False, ensure_ascii=True, sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis. - If indent is a non-negative integer, then JSON array - elements and object members will be pretty-printed with that - indent level. An indent level of 0 will only insert newlines. - None is the most compact representation. + If indent is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. - If specified, separators should be a (item_separator, key_separator) - tuple. The default is (', ', ': '). To get the most compact JSON - representation you should specify (',', ':') to eliminate whitespace. + If specified, separators should be an (item_separator, key_separator) + tuple. The default is (', ', ': ') if *indent* is ``None`` and + (',', ': ') otherwise. To get the most compact JSON representation, + you should specify (',', ':') to eliminate whitespace. If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable @@ -142,6 +189,41 @@ def __init__(self, skipkeys=False, ensure_ascii=True, transformed into unicode using that encoding prior to JSON-encoding. The default is UTF-8. + If use_decimal is true (default: ``True``), ``decimal.Decimal`` will + be supported directly by the encoder. For the inverse, decode JSON + with ``parse_float=decimal.Decimal``. + + If namedtuple_as_object is true (the default), objects with + ``_asdict()`` methods will be encoded as JSON objects. + + If tuple_as_array is true (the default), tuple (and subclasses) will + be encoded as JSON arrays. + + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + + If bigint_as_string is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If int_as_string_bitcount is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, item_sort_key is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. + + If for_json is true (not the default), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized + as ``null`` in compliance with the ECMA-262 specification. If true, + this will override *allow_nan*. + """ self.skipkeys = skipkeys @@ -149,9 +231,22 @@ def __init__(self, skipkeys=False, ensure_ascii=True, self.check_circular = check_circular self.allow_nan = allow_nan self.sort_keys = sort_keys + self.use_decimal = use_decimal + self.namedtuple_as_object = namedtuple_as_object + self.tuple_as_array = tuple_as_array + self.iterable_as_array = iterable_as_array + self.bigint_as_string = bigint_as_string + self.item_sort_key = item_sort_key + self.for_json = for_json + self.ignore_nan = ignore_nan + self.int_as_string_bitcount = int_as_string_bitcount + if indent is not None and not isinstance(indent, string_types): + indent = indent * ' ' self.indent = indent if separators is not None: self.item_separator, self.key_separator = separators + elif indent is not None: + self.item_separator = ',' if default is not None: self.default = default self.encoding = encoding @@ -174,22 +269,23 @@ def default(self, o): return JSONEncoder.default(self, o) """ - raise TypeError(repr(o) + " is not JSON serializable") + raise TypeError('Object of type %s is not JSON serializable' % + o.__class__.__name__) def encode(self, o): """Return a JSON string representation of a Python data structure. + >>> from simplejson import JSONEncoder >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) '{"foo": ["bar", "baz"]}' """ # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) + if isinstance(o, binary_type): + _encoding = self.encoding + if (_encoding is not None and not (_encoding == 'utf-8')): + o = text_type(o, _encoding) + if isinstance(o, string_types): if self.ensure_ascii: return encode_basestring_ascii(o) else: @@ -200,7 +296,10 @@ def encode(self, o): chunks = self.iterencode(o, _one_shot=True) if not isinstance(chunks, (list, tuple)): chunks = list(chunks) - return ''.join(chunks) + if self.ensure_ascii: + return ''.join(chunks) + else: + return u''.join(chunks) def iterencode(self, o, _one_shot=False): """Encode the given object and yield each string @@ -220,15 +319,17 @@ def iterencode(self, o, _one_shot=False): _encoder = encode_basestring_ascii else: _encoder = encode_basestring - if self.encoding != 'utf-8': + if self.encoding != 'utf-8' and self.encoding is not None: def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): - if isinstance(o, str): - o = o.decode(_encoding) + if isinstance(o, binary_type): + o = text_type(o, _encoding) return _orig_encoder(o) - def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): - # Check for specials. Note that this type of test is processor- and/or - # platform-specific, so do tests which don't depend on the internals. + def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan, + _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on + # the internals. if o != o: text = 'NaN' @@ -237,44 +338,135 @@ def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _negi elif o == _neginf: text = '-Infinity' else: + if type(o) != float: + # See #118, do not trust custom str/repr + o = float(o) return _repr(o) - if not allow_nan: + if ignore_nan: + text = 'null' + elif not allow_nan: raise ValueError( "Out of range float values are not JSON compliant: " + repr(o)) return text - - if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + key_memo = {} + int_as_string_bitcount = ( + 53 if self.bigint_as_string else self.int_as_string_bitcount) + if (_one_shot and c_make_encoder is not None + and self.indent is None): _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, self.allow_nan) + self.skipkeys, self.allow_nan, key_memo, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.ignore_nan, decimal.Decimal, self.iterable_as_array) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot) - return _iterencode(o, 0) + self.skipkeys, _one_shot, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.iterable_as_array, Decimal=decimal.Decimal) + try: + return _iterencode(o, 0) + finally: + key_memo.clear() + + +class JSONEncoderForHTML(JSONEncoder): + """An encoder that produces JSON safe to embed in HTML. -def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + To embed JSON content in, say, a script tag on a web page, the + characters &, < and > should be escaped. They cannot be escaped + with the usual entities (e.g. &) because they are not expanded + within - """ % ( self.OutputString(attrs).replace('"',r'\"'), ) - # end js_output() - - def OutputString(self, attrs=None): - # Build up our result - # - result = [] - RA = result.append - - # First, the key=value pair - RA("%s=%s" % (self.key, self.coded_value)) - - # Now add any defined attributes - if attrs is None: - attrs = self._reserved - items = self.items() - items.sort() - for K,V in items: - if V == "": continue - if K not in attrs: continue - if K == "expires" and type(V) == type(1): - RA("%s=%s" % (self._reserved[K], _getdate(V))) - elif K == "max-age" and type(V) == type(1): - RA("%s=%d" % (self._reserved[K], V)) - elif K == "secure": - RA(str(self._reserved[K])) - elif K == "httponly": - RA(str(self._reserved[K])) - else: - RA("%s=%s" % (self._reserved[K], V)) - - # Return the result - return _semispacejoin(result) - # end OutputString -# end Morsel class - - - -# -# Pattern for finding cookie -# -# This used to be strict parsing based on the RFC2109 and RFC2068 -# specifications. I have since discovered that MSIE 3.0x doesn't -# follow the character rules outlined in those specs. As a -# result, the parsing rules here are less strict. -# - -_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=" -_LegalValueChars = _LegalKeyChars + r"\[\]" -_CookiePattern = re.compile( - r"(?x)" # This is a Verbose pattern - r"\s*" # Optional whitespace at start of cookie - r"(?P" # Start of group 'key' - "["+ _LegalKeyChars +"]+?" # Any word of at least one letter, nongreedy - r")" # End of group 'key' - r"(" # Optional group: there may not be a value. - r"\s*=\s*" # Equal Sign - r"(?P" # Start of group 'val' - r'"(?:[^\\"]|\\.)*"' # Any doublequoted string - r"|" # or - r"\w{3},\s[\s\w\d-]{9,11}\s[\d:]{8}\sGMT" # Special case for "expires" attr - r"|" # or - "["+ _LegalValueChars +"]*" # Any word or empty string - r")" # End of group 'val' - r")?" # End of optional value group - r"\s*" # Any number of spaces. - r"(\s+|;|$)" # Ending either at space, semicolon, or EOS. - ) - - -# At long last, here is the cookie class. -# Using this class is almost just like using a dictionary. -# See this module's docstring for example usage. -# -class BaseCookie(dict): - # A container class for a set of Morsels - # - - def value_decode(self, val): - """real_value, coded_value = value_decode(STRING) - Called prior to setting a cookie's value from the network - representation. The VALUE is the value read from HTTP - header. - Override this function to modify the behavior of cookies. - """ - return val, val - # end value_encode - - def value_encode(self, val): - """real_value, coded_value = value_encode(VALUE) - Called prior to setting a cookie's value from the dictionary - representation. The VALUE is the value being assigned. - Override this function to modify the behavior of cookies. - """ - strval = str(val) - return strval, strval - # end value_encode - - def __init__(self, input=None): - if input: self.load(input) - # end __init__ - - def __set(self, key, real_value, coded_value): - """Private method for setting a cookie's value""" - M = self.get(key, Morsel()) - M.set(key, real_value, coded_value) - dict.__setitem__(self, key, M) - # end __set - - def __setitem__(self, key, value): - """Dictionary style assignment.""" - if isinstance(value, Morsel): - # allow assignment of constructed Morsels (e.g. for pickling) - dict.__setitem__(self, key, value) - else: - rval, cval = self.value_encode(value) - self.__set(key, rval, cval) - # end __setitem__ - - def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): - """Return a string suitable for HTTP.""" - result = [] - items = self.items() - items.sort() - for K,V in items: - result.append( V.output(attrs, header) ) - return sep.join(result) - # end output - - __str__ = output - - def __repr__(self): - L = [] - items = self.items() - items.sort() - for K,V in items: - L.append( '%s=%s' % (K,repr(V.value) ) ) - return '<%s: %s>' % (self.__class__.__name__, _spacejoin(L)) - - def js_output(self, attrs=None): - """Return a string suitable for JavaScript.""" - result = [] - items = self.items() - items.sort() - for K,V in items: - result.append( V.js_output(attrs) ) - return _nulljoin(result) - # end js_output - - def load(self, rawdata): - """Load cookies from a string (presumably HTTP_COOKIE) or - from a dictionary. Loading cookies from a dictionary 'd' - is equivalent to calling: - map(Cookie.__setitem__, d.keys(), d.values()) - """ - if type(rawdata) == type(""): - self.__ParseString(rawdata) - else: - # self.update() wouldn't call our custom __setitem__ - for k, v in rawdata.items(): - self[k] = v - return - # end load() - - def __ParseString(self, str, patt=_CookiePattern): - i = 0 # Our starting point - n = len(str) # Length of string - M = None # current morsel - - while 0 <= i < n: - # Start looking for a cookie - match = patt.match(str, i) - if not match: break # No more cookies - - K,V = match.group("key"), match.group("val") - i = match.end(0) - - # Parse the key, value in case it's metainfo - if K[0] == "$": - # We ignore attributes which pertain to the cookie - # mechanism as a whole. See RFC 2109. - # (Does anyone care?) - if M: - M[ K[1:] ] = V - elif K.lower() in Morsel._reserved: - if M: - if V is None: - if K.lower() in Morsel._flags: - M[K] = True - else: - M[K] = _unquote(V) - elif V is not None: - rval, cval = self.value_decode(V) - self.__set(K, rval, cval) - M = self[K] - # end __ParseString -# end BaseCookie class - -class SimpleCookie(BaseCookie): - """SimpleCookie - SimpleCookie supports strings as cookie values. When setting - the value using the dictionary assignment notation, SimpleCookie - calls the builtin str() to convert the value to a string. Values - received from HTTP are kept as strings. - """ - def value_decode(self, val): - return _unquote( val ), val - def value_encode(self, val): - strval = str(val) - return strval, _quote( strval ) -# end SimpleCookie - -class SerialCookie(BaseCookie): - """SerialCookie - SerialCookie supports arbitrary objects as cookie values. All - values are serialized (using cPickle) before being sent to the - client. All incoming values are assumed to be valid Pickle - representations. IF AN INCOMING VALUE IS NOT IN A VALID PICKLE - FORMAT, THEN AN EXCEPTION WILL BE RAISED. - - Note: Large cookie values add overhead because they must be - retransmitted on every HTTP transaction. - - Note: HTTP has a 2k limit on the size of a cookie. This class - does not check for this limit, so be careful!!! - """ - def __init__(self, input=None): - warnings.warn("SerialCookie class is insecure; do not use it", - DeprecationWarning) - BaseCookie.__init__(self, input) - # end __init__ - def value_decode(self, val): - # This could raise an exception! - return loads( _unquote(val) ), val - def value_encode(self, val): - return val, _quote( dumps(val) ) -# end SerialCookie - -class SmartCookie(BaseCookie): - """SmartCookie - SmartCookie supports arbitrary objects as cookie values. If the - object is a string, then it is quoted. If the object is not a - string, however, then SmartCookie will use cPickle to serialize - the object into a string representation. - - Note: Large cookie values add overhead because they must be - retransmitted on every HTTP transaction. - - Note: HTTP has a 2k limit on the size of a cookie. This class - does not check for this limit, so be careful!!! - """ - def __init__(self, input=None): - warnings.warn("Cookie/SmartCookie class is insecure; do not use it", - DeprecationWarning) - BaseCookie.__init__(self, input) - # end __init__ - def value_decode(self, val): - strval = _unquote(val) - try: - return loads(strval), val - except: - return strval, val - def value_encode(self, val): - if type(val) == type(""): - return val, _quote(val) - else: - return val, _quote( dumps(val) ) -# end SmartCookie - - -########################################################### -# Backwards Compatibility: Don't break any existing code! - -# We provide Cookie() as an alias for SmartCookie() -Cookie = SmartCookie - -# -########################################################### - -def _test(): - import doctest, Cookie - return doctest.testmod(Cookie) - -if __name__ == "__main__": - _test() - - -#Local Variables: -#tab-width: 4 -#end: diff --git a/python/Lib/DocXMLRPCServer.py b/python/Lib/DocXMLRPCServer.py deleted file mode 100755 index 4064ec2e48..0000000000 --- a/python/Lib/DocXMLRPCServer.py +++ /dev/null @@ -1,279 +0,0 @@ -"""Self documenting XML-RPC Server. - -This module can be used to create XML-RPC servers that -serve pydoc-style documentation in response to HTTP -GET requests. This documentation is dynamically generated -based on the functions and methods registered with the -server. - -This module is built upon the pydoc and SimpleXMLRPCServer -modules. -""" - -import pydoc -import inspect -import re -import sys - -from SimpleXMLRPCServer import (SimpleXMLRPCServer, - SimpleXMLRPCRequestHandler, - CGIXMLRPCRequestHandler, - resolve_dotted_attribute) - -class ServerHTMLDoc(pydoc.HTMLDoc): - """Class used to generate pydoc HTML document for a server""" - - def markup(self, text, escape=None, funcs={}, classes={}, methods={}): - """Mark up some plain text, given a context of symbols to look for. - Each context dictionary maps object names to anchor names.""" - escape = escape or self.escape - results = [] - here = 0 - - # XXX Note that this regular expression does not allow for the - # hyperlinking of arbitrary strings being used as method - # names. Only methods with names consisting of word characters - # and '.'s are hyperlinked. - pattern = re.compile(r'\b((http|ftp)://\S+[\w/]|' - r'RFC[- ]?(\d+)|' - r'PEP[- ]?(\d+)|' - r'(self\.)?((?:\w|\.)+))\b') - while 1: - match = pattern.search(text, here) - if not match: break - start, end = match.span() - results.append(escape(text[here:start])) - - all, scheme, rfc, pep, selfdot, name = match.groups() - if scheme: - url = escape(all).replace('"', '"') - results.append('%s' % (url, url)) - elif rfc: - url = 'http://www.rfc-editor.org/rfc/rfc%d.txt' % int(rfc) - results.append('%s' % (url, escape(all))) - elif pep: - url = 'http://www.python.org/dev/peps/pep-%04d/' % int(pep) - results.append('%s' % (url, escape(all))) - elif text[end:end+1] == '(': - results.append(self.namelink(name, methods, funcs, classes)) - elif selfdot: - results.append('self.%s' % name) - else: - results.append(self.namelink(name, classes)) - here = end - results.append(escape(text[here:])) - return ''.join(results) - - def docroutine(self, object, name, mod=None, - funcs={}, classes={}, methods={}, cl=None): - """Produce HTML documentation for a function or method object.""" - - anchor = (cl and cl.__name__ or '') + '-' + name - note = '' - - title = '%s' % ( - self.escape(anchor), self.escape(name)) - - if inspect.ismethod(object): - args, varargs, varkw, defaults = inspect.getargspec(object.im_func) - # exclude the argument bound to the instance, it will be - # confusing to the non-Python user - argspec = inspect.formatargspec ( - args[1:], - varargs, - varkw, - defaults, - formatvalue=self.formatvalue - ) - elif inspect.isfunction(object): - args, varargs, varkw, defaults = inspect.getargspec(object) - argspec = inspect.formatargspec( - args, varargs, varkw, defaults, formatvalue=self.formatvalue) - else: - argspec = '(...)' - - if isinstance(object, tuple): - argspec = object[0] or argspec - docstring = object[1] or "" - else: - docstring = pydoc.getdoc(object) - - decl = title + argspec + (note and self.grey( - '%s' % note)) - - doc = self.markup( - docstring, self.preformat, funcs, classes, methods) - doc = doc and '
%s
' % doc - return '
%s
%s
\n' % (decl, doc) - - def docserver(self, server_name, package_documentation, methods): - """Produce HTML documentation for an XML-RPC server.""" - - fdict = {} - for key, value in methods.items(): - fdict[key] = '#-' + key - fdict[value] = fdict[key] - - server_name = self.escape(server_name) - head = '%s' % server_name - result = self.heading(head, '#ffffff', '#7799ee') - - doc = self.markup(package_documentation, self.preformat, fdict) - doc = doc and '%s' % doc - result = result + '

%s

\n' % doc - - contents = [] - method_items = sorted(methods.items()) - for key, value in method_items: - contents.append(self.docroutine(value, key, funcs=fdict)) - result = result + self.bigsection( - 'Methods', '#ffffff', '#eeaa77', pydoc.join(contents)) - - return result - -class XMLRPCDocGenerator: - """Generates documentation for an XML-RPC server. - - This class is designed as mix-in and should not - be constructed directly. - """ - - def __init__(self): - # setup variables used for HTML documentation - self.server_name = 'XML-RPC Server Documentation' - self.server_documentation = \ - "This server exports the following methods through the XML-RPC "\ - "protocol." - self.server_title = 'XML-RPC Server Documentation' - - def set_server_title(self, server_title): - """Set the HTML title of the generated server documentation""" - - self.server_title = server_title - - def set_server_name(self, server_name): - """Set the name of the generated HTML server documentation""" - - self.server_name = server_name - - def set_server_documentation(self, server_documentation): - """Set the documentation string for the entire server.""" - - self.server_documentation = server_documentation - - def generate_html_documentation(self): - """generate_html_documentation() => html documentation for the server - - Generates HTML documentation for the server using introspection for - installed functions and instances that do not implement the - _dispatch method. Alternatively, instances can choose to implement - the _get_method_argstring(method_name) method to provide the - argument string used in the documentation and the - _methodHelp(method_name) method to provide the help text used - in the documentation.""" - - methods = {} - - for method_name in self.system_listMethods(): - if method_name in self.funcs: - method = self.funcs[method_name] - elif self.instance is not None: - method_info = [None, None] # argspec, documentation - if hasattr(self.instance, '_get_method_argstring'): - method_info[0] = self.instance._get_method_argstring(method_name) - if hasattr(self.instance, '_methodHelp'): - method_info[1] = self.instance._methodHelp(method_name) - - method_info = tuple(method_info) - if method_info != (None, None): - method = method_info - elif not hasattr(self.instance, '_dispatch'): - try: - method = resolve_dotted_attribute( - self.instance, - method_name - ) - except AttributeError: - method = method_info - else: - method = method_info - else: - assert 0, "Could not find method in self.functions and no "\ - "instance installed" - - methods[method_name] = method - - documenter = ServerHTMLDoc() - documentation = documenter.docserver( - self.server_name, - self.server_documentation, - methods - ) - - return documenter.page(self.server_title, documentation) - -class DocXMLRPCRequestHandler(SimpleXMLRPCRequestHandler): - """XML-RPC and documentation request handler class. - - Handles all HTTP POST requests and attempts to decode them as - XML-RPC requests. - - Handles all HTTP GET requests and interprets them as requests - for documentation. - """ - - def do_GET(self): - """Handles the HTTP GET request. - - Interpret all HTTP GET requests as requests for server - documentation. - """ - # Check that the path is legal - if not self.is_rpc_path_valid(): - self.report_404() - return - - response = self.server.generate_html_documentation() - self.send_response(200) - self.send_header("Content-type", "text/html") - self.send_header("Content-length", str(len(response))) - self.end_headers() - self.wfile.write(response) - -class DocXMLRPCServer( SimpleXMLRPCServer, - XMLRPCDocGenerator): - """XML-RPC and HTML documentation server. - - Adds the ability to serve server documentation to the capabilities - of SimpleXMLRPCServer. - """ - - def __init__(self, addr, requestHandler=DocXMLRPCRequestHandler, - logRequests=1, allow_none=False, encoding=None, - bind_and_activate=True): - SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, - allow_none, encoding, bind_and_activate) - XMLRPCDocGenerator.__init__(self) - -class DocCGIXMLRPCRequestHandler( CGIXMLRPCRequestHandler, - XMLRPCDocGenerator): - """Handler for XML-RPC data and documentation requests passed through - CGI""" - - def handle_get(self): - """Handles the HTTP GET request. - - Interpret all HTTP GET requests as requests for server - documentation. - """ - - response = self.generate_html_documentation() - - print 'Content-Type: text/html' - print 'Content-Length: %d' % len(response) - print - sys.stdout.write(response) - - def __init__(self): - CGIXMLRPCRequestHandler.__init__(self) - XMLRPCDocGenerator.__init__(self) diff --git a/python/Lib/HTMLParser.py b/python/Lib/HTMLParser.py deleted file mode 100755 index 3f97830a9a..0000000000 --- a/python/Lib/HTMLParser.py +++ /dev/null @@ -1,475 +0,0 @@ -"""A parser for HTML and XHTML.""" - -# This file is based on sgmllib.py, but the API is slightly different. - -# XXX There should be a way to distinguish between PCDATA (parsed -# character data -- the normal case), RCDATA (replaceable character -# data -- only char and entity references and end tags are special) -# and CDATA (character data -- only end tags are special). - - -import markupbase -import re - -# Regular expressions used for parsing - -interesting_normal = re.compile('[&<]') -incomplete = re.compile('&[a-zA-Z#]') - -entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') -charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') - -starttagopen = re.compile('<[a-zA-Z]') -piclose = re.compile('>') -commentclose = re.compile(r'--\s*>') - -# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state -# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -# note: if you change tagfind/attrfind remember to update locatestarttagend too -tagfind = re.compile('([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*') -# this regex is currently unused, but left for backward compatibility -tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*') - -attrfind = re.compile( - r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') - -locatestarttagend = re.compile(r""" - <[a-zA-Z][^\t\n\r\f />\x00]* # tag name - (?:[\s/]* # optional whitespace before attribute name - (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name - (?:\s*=+\s* # value indicator - (?:'[^']*' # LITA-enclosed value - |"[^"]*" # LIT-enclosed value - |(?!['"])[^>\s]* # bare value - ) - )?(?:\s|/(?!>))* - )* - )? - \s* # trailing whitespace -""", re.VERBOSE) -endendtag = re.compile('>') -# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between -# ') - - -class HTMLParseError(Exception): - """Exception raised for all parse errors.""" - - def __init__(self, msg, position=(None, None)): - assert msg - self.msg = msg - self.lineno = position[0] - self.offset = position[1] - - def __str__(self): - result = self.msg - if self.lineno is not None: - result = result + ", at line %d" % self.lineno - if self.offset is not None: - result = result + ", column %d" % (self.offset + 1) - return result - - -class HTMLParser(markupbase.ParserBase): - """Find tags and other markup and call handler functions. - - Usage: - p = HTMLParser() - p.feed(data) - ... - p.close() - - Start tags are handled by calling self.handle_starttag() or - self.handle_startendtag(); end tags by self.handle_endtag(). The - data between tags is passed from the parser to the derived class - by calling self.handle_data() with the data as argument (the data - may be split up in arbitrary chunks). Entity references are - passed by calling self.handle_entityref() with the entity - reference as the argument. Numeric character references are - passed to self.handle_charref() with the string containing the - reference as the argument. - """ - - CDATA_CONTENT_ELEMENTS = ("script", "style") - - - def __init__(self): - """Initialize and reset this instance.""" - self.reset() - - def reset(self): - """Reset this instance. Loses all unprocessed data.""" - self.rawdata = '' - self.lasttag = '???' - self.interesting = interesting_normal - self.cdata_elem = None - markupbase.ParserBase.reset(self) - - def feed(self, data): - r"""Feed data to the parser. - - Call this as often as you want, with as little or as much text - as you want (may include '\n'). - """ - self.rawdata = self.rawdata + data - self.goahead(0) - - def close(self): - """Handle any buffered data.""" - self.goahead(1) - - def error(self, message): - raise HTMLParseError(message, self.getpos()) - - __starttag_text = None - - def get_starttag_text(self): - """Return full source of start tag: '<...>'.""" - return self.__starttag_text - - def set_cdata_mode(self, elem): - self.cdata_elem = elem.lower() - self.interesting = re.compile(r'' % self.cdata_elem, re.I) - - def clear_cdata_mode(self): - self.interesting = interesting_normal - self.cdata_elem = None - - # Internal -- handle data as far as reasonable. May leave state - # and data to be processed by a subsequent call. If 'end' is - # true, force handling all data as if followed by EOF marker. - def goahead(self, end): - rawdata = self.rawdata - i = 0 - n = len(rawdata) - while i < n: - match = self.interesting.search(rawdata, i) # < or & - if match: - j = match.start() - else: - if self.cdata_elem: - break - j = n - if i < j: self.handle_data(rawdata[i:j]) - i = self.updatepos(i, j) - if i == n: break - startswith = rawdata.startswith - if startswith('<', i): - if starttagopen.match(rawdata, i): # < + letter - k = self.parse_starttag(i) - elif startswith("', i + 1) - if k < 0: - k = rawdata.find('<', i + 1) - if k < 0: - k = i + 1 - else: - k += 1 - self.handle_data(rawdata[i:k]) - i = self.updatepos(i, k) - elif startswith("&#", i): - match = charref.match(rawdata, i) - if match: - name = match.group()[2:-1] - self.handle_charref(name) - k = match.end() - if not startswith(';', k-1): - k = k - 1 - i = self.updatepos(i, k) - continue - else: - if ";" in rawdata[i:]: # bail by consuming '&#' - self.handle_data(rawdata[i:i+2]) - i = self.updatepos(i, i+2) - break - elif startswith('&', i): - match = entityref.match(rawdata, i) - if match: - name = match.group(1) - self.handle_entityref(name) - k = match.end() - if not startswith(';', k-1): - k = k - 1 - i = self.updatepos(i, k) - continue - match = incomplete.match(rawdata, i) - if match: - # match.group() will contain at least 2 chars - if end and match.group() == rawdata[i:]: - self.error("EOF in middle of entity or char ref") - # incomplete - break - elif (i + 1) < n: - # not the end of the buffer, and can't be confused - # with some other construct - self.handle_data("&") - i = self.updatepos(i, i + 1) - else: - break - else: - assert 0, "interesting.search() lied" - # end while - if end and i < n and not self.cdata_elem: - self.handle_data(rawdata[i:n]) - i = self.updatepos(i, n) - self.rawdata = rawdata[i:] - - # Internal -- parse html declarations, return length or -1 if not terminated - # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state - # See also parse_declaration in _markupbase - def parse_html_declaration(self, i): - rawdata = self.rawdata - if rawdata[i:i+2] != ' - gtpos = rawdata.find('>', i+9) - if gtpos == -1: - return -1 - self.handle_decl(rawdata[i+2:gtpos]) - return gtpos+1 - else: - return self.parse_bogus_comment(i) - - # Internal -- parse bogus comment, return length or -1 if not terminated - # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state - def parse_bogus_comment(self, i, report=1): - rawdata = self.rawdata - if rawdata[i:i+2] not in ('', i+2) - if pos == -1: - return -1 - if report: - self.handle_comment(rawdata[i+2:pos]) - return pos + 1 - - # Internal -- parse processing instr, return end or -1 if not terminated - def parse_pi(self, i): - rawdata = self.rawdata - assert rawdata[i:i+2] == ' - if not match: - return -1 - j = match.start() - self.handle_pi(rawdata[i+2: j]) - j = match.end() - return j - - # Internal -- handle starttag, return end or -1 if not terminated - def parse_starttag(self, i): - self.__starttag_text = None - endpos = self.check_for_whole_start_tag(i) - if endpos < 0: - return endpos - rawdata = self.rawdata - self.__starttag_text = rawdata[i:endpos] - - # Now parse the data between i+1 and j into a tag and attrs - attrs = [] - match = tagfind.match(rawdata, i+1) - assert match, 'unexpected call to parse_starttag()' - k = match.end() - self.lasttag = tag = match.group(1).lower() - - while k < endpos: - m = attrfind.match(rawdata, k) - if not m: - break - attrname, rest, attrvalue = m.group(1, 2, 3) - if not rest: - attrvalue = None - elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ - attrvalue[:1] == '"' == attrvalue[-1:]: - attrvalue = attrvalue[1:-1] - if attrvalue: - attrvalue = self.unescape(attrvalue) - attrs.append((attrname.lower(), attrvalue)) - k = m.end() - - end = rawdata[k:endpos].strip() - if end not in (">", "/>"): - lineno, offset = self.getpos() - if "\n" in self.__starttag_text: - lineno = lineno + self.__starttag_text.count("\n") - offset = len(self.__starttag_text) \ - - self.__starttag_text.rfind("\n") - else: - offset = offset + len(self.__starttag_text) - self.handle_data(rawdata[i:endpos]) - return endpos - if end.endswith('/>'): - # XHTML-style empty tag: - self.handle_startendtag(tag, attrs) - else: - self.handle_starttag(tag, attrs) - if tag in self.CDATA_CONTENT_ELEMENTS: - self.set_cdata_mode(tag) - return endpos - - # Internal -- check to see if we have a complete starttag; return end - # or -1 if incomplete. - def check_for_whole_start_tag(self, i): - rawdata = self.rawdata - m = locatestarttagend.match(rawdata, i) - if m: - j = m.end() - next = rawdata[j:j+1] - if next == ">": - return j + 1 - if next == "/": - if rawdata.startswith("/>", j): - return j + 2 - if rawdata.startswith("/", j): - # buffer boundary - return -1 - # else bogus input - self.updatepos(i, j + 1) - self.error("malformed empty start tag") - if next == "": - # end of input - return -1 - if next in ("abcdefghijklmnopqrstuvwxyz=/" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ"): - # end of input in or before attribute value, or we have the - # '/' from a '/>' ending - return -1 - if j > i: - return j - else: - return i + 1 - raise AssertionError("we should not get here!") - - # Internal -- parse endtag, return end or -1 if incomplete - def parse_endtag(self, i): - rawdata = self.rawdata - assert rawdata[i:i+2] == " - if not match: - return -1 - gtpos = match.end() - match = endtagfind.match(rawdata, i) # - if not match: - if self.cdata_elem is not None: - self.handle_data(rawdata[i:gtpos]) - return gtpos - # find the name: w3.org/TR/html5/tokenization.html#tag-name-state - namematch = tagfind.match(rawdata, i+2) - if not namematch: - # w3.org/TR/html5/tokenization.html#end-tag-open-state - if rawdata[i:i+3] == '': - return i+3 - else: - return self.parse_bogus_comment(i) - tagname = namematch.group(1).lower() - # consume and ignore other stuff between the name and the > - # Note: this is not 100% correct, since we might have things like - # , but looking for > after tha name should cover - # most of the cases and is much simpler - gtpos = rawdata.find('>', namematch.end()) - self.handle_endtag(tagname) - return gtpos+1 - - elem = match.group(1).lower() # script or style - if self.cdata_elem is not None: - if elem != self.cdata_elem: - self.handle_data(rawdata[i:gtpos]) - return gtpos - - self.handle_endtag(elem) - self.clear_cdata_mode() - return gtpos - - # Overridable -- finish processing of start+end tag: - def handle_startendtag(self, tag, attrs): - self.handle_starttag(tag, attrs) - self.handle_endtag(tag) - - # Overridable -- handle start tag - def handle_starttag(self, tag, attrs): - pass - - # Overridable -- handle end tag - def handle_endtag(self, tag): - pass - - # Overridable -- handle character reference - def handle_charref(self, name): - pass - - # Overridable -- handle entity reference - def handle_entityref(self, name): - pass - - # Overridable -- handle data - def handle_data(self, data): - pass - - # Overridable -- handle comment - def handle_comment(self, data): - pass - - # Overridable -- handle declaration - def handle_decl(self, decl): - pass - - # Overridable -- handle processing instruction - def handle_pi(self, data): - pass - - def unknown_decl(self, data): - pass - - # Internal -- helper to remove special character quoting - entitydefs = None - def unescape(self, s): - if '&' not in s: - return s - def replaceEntities(s): - s = s.groups()[0] - try: - if s[0] == "#": - s = s[1:] - if s[0] in ['x','X']: - c = int(s[1:], 16) - else: - c = int(s) - return unichr(c) - except ValueError: - return '&#'+s+';' - else: - # Cannot use name2codepoint directly, because HTMLParser supports apos, - # which is not part of HTML 4 - import htmlentitydefs - if HTMLParser.entitydefs is None: - entitydefs = HTMLParser.entitydefs = {'apos':u"'"} - for k, v in htmlentitydefs.name2codepoint.iteritems(): - entitydefs[k] = unichr(v) - try: - return self.entitydefs[s] - except KeyError: - return '&'+s+';' - - return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s) diff --git a/python/Lib/MimeWriter.py b/python/Lib/MimeWriter.py deleted file mode 100755 index e898f9ff23..0000000000 --- a/python/Lib/MimeWriter.py +++ /dev/null @@ -1,186 +0,0 @@ -"""Generic MIME writer. - -This module defines the class MimeWriter. The MimeWriter class implements -a basic formatter for creating MIME multi-part files. It doesn't seek around -the output file nor does it use large amounts of buffer space. You must write -the parts out in the order that they should occur in the final file. -MimeWriter does buffer the headers you add, allowing you to rearrange their -order. - -""" - - -import mimetools - -__all__ = ["MimeWriter"] - -import warnings - -warnings.warn("the MimeWriter module is deprecated; use the email package instead", - DeprecationWarning, 2) - -class MimeWriter: - - """Generic MIME writer. - - Methods: - - __init__() - addheader() - flushheaders() - startbody() - startmultipartbody() - nextpart() - lastpart() - - A MIME writer is much more primitive than a MIME parser. It - doesn't seek around on the output file, and it doesn't use large - amounts of buffer space, so you have to write the parts in the - order they should occur on the output file. It does buffer the - headers you add, allowing you to rearrange their order. - - General usage is: - - f = - w = MimeWriter(f) - ...call w.addheader(key, value) 0 or more times... - - followed by either: - - f = w.startbody(content_type) - ...call f.write(data) for body data... - - or: - - w.startmultipartbody(subtype) - for each part: - subwriter = w.nextpart() - ...use the subwriter's methods to create the subpart... - w.lastpart() - - The subwriter is another MimeWriter instance, and should be - treated in the same way as the toplevel MimeWriter. This way, - writing recursive body parts is easy. - - Warning: don't forget to call lastpart()! - - XXX There should be more state so calls made in the wrong order - are detected. - - Some special cases: - - - startbody() just returns the file passed to the constructor; - but don't use this knowledge, as it may be changed. - - - startmultipartbody() actually returns a file as well; - this can be used to write the initial 'if you can read this your - mailer is not MIME-aware' message. - - - If you call flushheaders(), the headers accumulated so far are - written out (and forgotten); this is useful if you don't need a - body part at all, e.g. for a subpart of type message/rfc822 - that's (mis)used to store some header-like information. - - - Passing a keyword argument 'prefix=' to addheader(), - start*body() affects where the header is inserted; 0 means - append at the end, 1 means insert at the start; default is - append for addheader(), but insert for start*body(), which use - it to determine where the Content-Type header goes. - - """ - - def __init__(self, fp): - self._fp = fp - self._headers = [] - - def addheader(self, key, value, prefix=0): - """Add a header line to the MIME message. - - The key is the name of the header, where the value obviously provides - the value of the header. The optional argument prefix determines - where the header is inserted; 0 means append at the end, 1 means - insert at the start. The default is to append. - - """ - lines = value.split("\n") - while lines and not lines[-1]: del lines[-1] - while lines and not lines[0]: del lines[0] - for i in range(1, len(lines)): - lines[i] = " " + lines[i].strip() - value = "\n".join(lines) + "\n" - line = key + ": " + value - if prefix: - self._headers.insert(0, line) - else: - self._headers.append(line) - - def flushheaders(self): - """Writes out and forgets all headers accumulated so far. - - This is useful if you don't need a body part at all; for example, - for a subpart of type message/rfc822 that's (mis)used to store some - header-like information. - - """ - self._fp.writelines(self._headers) - self._headers = [] - - def startbody(self, ctype, plist=[], prefix=1): - """Returns a file-like object for writing the body of the message. - - The content-type is set to the provided ctype, and the optional - parameter, plist, provides additional parameters for the - content-type declaration. The optional argument prefix determines - where the header is inserted; 0 means append at the end, 1 means - insert at the start. The default is to insert at the start. - - """ - for name, value in plist: - ctype = ctype + ';\n %s=\"%s\"' % (name, value) - self.addheader("Content-Type", ctype, prefix=prefix) - self.flushheaders() - self._fp.write("\n") - return self._fp - - def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1): - """Returns a file-like object for writing the body of the message. - - Additionally, this method initializes the multi-part code, where the - subtype parameter provides the multipart subtype, the boundary - parameter may provide a user-defined boundary specification, and the - plist parameter provides optional parameters for the subtype. The - optional argument, prefix, determines where the header is inserted; - 0 means append at the end, 1 means insert at the start. The default - is to insert at the start. Subparts should be created using the - nextpart() method. - - """ - self._boundary = boundary or mimetools.choose_boundary() - return self.startbody("multipart/" + subtype, - [("boundary", self._boundary)] + plist, - prefix=prefix) - - def nextpart(self): - """Returns a new instance of MimeWriter which represents an - individual part in a multipart message. - - This may be used to write the part as well as used for creating - recursively complex multipart messages. The message must first be - initialized with the startmultipartbody() method before using the - nextpart() method. - - """ - self._fp.write("\n--" + self._boundary + "\n") - return self.__class__(self._fp) - - def lastpart(self): - """This is used to designate the last part of a multipart message. - - It should always be used when writing multipart messages. - - """ - self._fp.write("\n--" + self._boundary + "--\n") - - -if __name__ == '__main__': - import test.test_MimeWriter diff --git a/python/Lib/Queue.py b/python/Lib/Queue.py deleted file mode 100755 index 00364b39be..0000000000 --- a/python/Lib/Queue.py +++ /dev/null @@ -1,244 +0,0 @@ -"""A multi-producer, multi-consumer queue.""" - -from time import time as _time -try: - import threading as _threading -except ImportError: - import dummy_threading as _threading -from collections import deque -import heapq - -__all__ = ['Empty', 'Full', 'Queue', 'PriorityQueue', 'LifoQueue'] - -class Empty(Exception): - "Exception raised by Queue.get(block=0)/get_nowait()." - pass - -class Full(Exception): - "Exception raised by Queue.put(block=0)/put_nowait()." - pass - -class Queue: - """Create a queue object with a given maximum size. - - If maxsize is <= 0, the queue size is infinite. - """ - def __init__(self, maxsize=0): - self.maxsize = maxsize - self._init(maxsize) - # mutex must be held whenever the queue is mutating. All methods - # that acquire mutex must release it before returning. mutex - # is shared between the three conditions, so acquiring and - # releasing the conditions also acquires and releases mutex. - self.mutex = _threading.Lock() - # Notify not_empty whenever an item is added to the queue; a - # thread waiting to get is notified then. - self.not_empty = _threading.Condition(self.mutex) - # Notify not_full whenever an item is removed from the queue; - # a thread waiting to put is notified then. - self.not_full = _threading.Condition(self.mutex) - # Notify all_tasks_done whenever the number of unfinished tasks - # drops to zero; thread waiting to join() is notified to resume - self.all_tasks_done = _threading.Condition(self.mutex) - self.unfinished_tasks = 0 - - def task_done(self): - """Indicate that a formerly enqueued task is complete. - - Used by Queue consumer threads. For each get() used to fetch a task, - a subsequent call to task_done() tells the queue that the processing - on the task is complete. - - If a join() is currently blocking, it will resume when all items - have been processed (meaning that a task_done() call was received - for every item that had been put() into the queue). - - Raises a ValueError if called more times than there were items - placed in the queue. - """ - self.all_tasks_done.acquire() - try: - unfinished = self.unfinished_tasks - 1 - if unfinished <= 0: - if unfinished < 0: - raise ValueError('task_done() called too many times') - self.all_tasks_done.notify_all() - self.unfinished_tasks = unfinished - finally: - self.all_tasks_done.release() - - def join(self): - """Blocks until all items in the Queue have been gotten and processed. - - The count of unfinished tasks goes up whenever an item is added to the - queue. The count goes down whenever a consumer thread calls task_done() - to indicate the item was retrieved and all work on it is complete. - - When the count of unfinished tasks drops to zero, join() unblocks. - """ - self.all_tasks_done.acquire() - try: - while self.unfinished_tasks: - self.all_tasks_done.wait() - finally: - self.all_tasks_done.release() - - def qsize(self): - """Return the approximate size of the queue (not reliable!).""" - self.mutex.acquire() - n = self._qsize() - self.mutex.release() - return n - - def empty(self): - """Return True if the queue is empty, False otherwise (not reliable!).""" - self.mutex.acquire() - n = not self._qsize() - self.mutex.release() - return n - - def full(self): - """Return True if the queue is full, False otherwise (not reliable!).""" - self.mutex.acquire() - n = 0 < self.maxsize == self._qsize() - self.mutex.release() - return n - - def put(self, item, block=True, timeout=None): - """Put an item into the queue. - - If optional args 'block' is true and 'timeout' is None (the default), - block if necessary until a free slot is available. If 'timeout' is - a non-negative number, it blocks at most 'timeout' seconds and raises - the Full exception if no free slot was available within that time. - Otherwise ('block' is false), put an item on the queue if a free slot - is immediately available, else raise the Full exception ('timeout' - is ignored in that case). - """ - self.not_full.acquire() - try: - if self.maxsize > 0: - if not block: - if self._qsize() == self.maxsize: - raise Full - elif timeout is None: - while self._qsize() == self.maxsize: - self.not_full.wait() - elif timeout < 0: - raise ValueError("'timeout' must be a non-negative number") - else: - endtime = _time() + timeout - while self._qsize() == self.maxsize: - remaining = endtime - _time() - if remaining <= 0.0: - raise Full - self.not_full.wait(remaining) - self._put(item) - self.unfinished_tasks += 1 - self.not_empty.notify() - finally: - self.not_full.release() - - def put_nowait(self, item): - """Put an item into the queue without blocking. - - Only enqueue the item if a free slot is immediately available. - Otherwise raise the Full exception. - """ - return self.put(item, False) - - def get(self, block=True, timeout=None): - """Remove and return an item from the queue. - - If optional args 'block' is true and 'timeout' is None (the default), - block if necessary until an item is available. If 'timeout' is - a non-negative number, it blocks at most 'timeout' seconds and raises - the Empty exception if no item was available within that time. - Otherwise ('block' is false), return an item if one is immediately - available, else raise the Empty exception ('timeout' is ignored - in that case). - """ - self.not_empty.acquire() - try: - if not block: - if not self._qsize(): - raise Empty - elif timeout is None: - while not self._qsize(): - self.not_empty.wait() - elif timeout < 0: - raise ValueError("'timeout' must be a non-negative number") - else: - endtime = _time() + timeout - while not self._qsize(): - remaining = endtime - _time() - if remaining <= 0.0: - raise Empty - self.not_empty.wait(remaining) - item = self._get() - self.not_full.notify() - return item - finally: - self.not_empty.release() - - def get_nowait(self): - """Remove and return an item from the queue without blocking. - - Only get an item if one is immediately available. Otherwise - raise the Empty exception. - """ - return self.get(False) - - # Override these methods to implement other queue organizations - # (e.g. stack or priority queue). - # These will only be called with appropriate locks held - - # Initialize the queue representation - def _init(self, maxsize): - self.queue = deque() - - def _qsize(self, len=len): - return len(self.queue) - - # Put a new item in the queue - def _put(self, item): - self.queue.append(item) - - # Get an item from the queue - def _get(self): - return self.queue.popleft() - - -class PriorityQueue(Queue): - '''Variant of Queue that retrieves open entries in priority order (lowest first). - - Entries are typically tuples of the form: (priority number, data). - ''' - - def _init(self, maxsize): - self.queue = [] - - def _qsize(self, len=len): - return len(self.queue) - - def _put(self, item, heappush=heapq.heappush): - heappush(self.queue, item) - - def _get(self, heappop=heapq.heappop): - return heappop(self.queue) - - -class LifoQueue(Queue): - '''Variant of Queue that retrieves most recently added entries first.''' - - def _init(self, maxsize): - self.queue = [] - - def _qsize(self, len=len): - return len(self.queue) - - def _put(self, item): - self.queue.append(item) - - def _get(self): - return self.queue.pop() diff --git a/python/Lib/SimpleHTTPServer.py b/python/Lib/SimpleHTTPServer.py deleted file mode 100755 index c140a273fb..0000000000 --- a/python/Lib/SimpleHTTPServer.py +++ /dev/null @@ -1,235 +0,0 @@ -"""Simple HTTP Server. - -This module builds on BaseHTTPServer by implementing the standard GET -and HEAD requests in a fairly straightforward manner. - -""" - - -__version__ = "0.6" - -__all__ = ["SimpleHTTPRequestHandler"] - -import os -import posixpath -import BaseHTTPServer -import urllib -import urlparse -import cgi -import sys -import shutil -import mimetypes -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - - -class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): - - """Simple HTTP request handler with GET and HEAD commands. - - This serves files from the current directory and any of its - subdirectories. The MIME type for files is determined by - calling the .guess_type() method. - - The GET and HEAD requests are identical except that the HEAD - request omits the actual contents of the file. - - """ - - server_version = "SimpleHTTP/" + __version__ - - def do_GET(self): - """Serve a GET request.""" - f = self.send_head() - if f: - try: - self.copyfile(f, self.wfile) - finally: - f.close() - - def do_HEAD(self): - """Serve a HEAD request.""" - f = self.send_head() - if f: - f.close() - - def send_head(self): - """Common code for GET and HEAD commands. - - This sends the response code and MIME headers. - - Return value is either a file object (which has to be copied - to the outputfile by the caller unless the command was HEAD, - and must be closed by the caller under all circumstances), or - None, in which case the caller has nothing further to do. - - """ - path = self.translate_path(self.path) - f = None - if os.path.isdir(path): - parts = urlparse.urlsplit(self.path) - if not parts.path.endswith('/'): - # redirect browser - doing basically what apache does - self.send_response(301) - new_parts = (parts[0], parts[1], parts[2] + '/', - parts[3], parts[4]) - new_url = urlparse.urlunsplit(new_parts) - self.send_header("Location", new_url) - self.end_headers() - return None - for index in "index.html", "index.htm": - index = os.path.join(path, index) - if os.path.exists(index): - path = index - break - else: - return self.list_directory(path) - ctype = self.guess_type(path) - try: - # Always read in binary mode. Opening files in text mode may cause - # newline translations, making the actual size of the content - # transmitted *less* than the content-length! - f = open(path, 'rb') - except IOError: - self.send_error(404, "File not found") - return None - try: - self.send_response(200) - self.send_header("Content-type", ctype) - fs = os.fstat(f.fileno()) - self.send_header("Content-Length", str(fs[6])) - self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) - self.end_headers() - return f - except: - f.close() - raise - - def list_directory(self, path): - """Helper to produce a directory listing (absent index.html). - - Return value is either a file object, or None (indicating an - error). In either case, the headers are sent, making the - interface the same as for send_head(). - - """ - try: - list = os.listdir(path) - except os.error: - self.send_error(404, "No permission to list directory") - return None - list.sort(key=lambda a: a.lower()) - f = StringIO() - displaypath = cgi.escape(urllib.unquote(self.path)) - f.write('') - f.write("\nDirectory listing for %s\n" % displaypath) - f.write("\n

Directory listing for %s

\n" % displaypath) - f.write("
\n
    \n") - for name in list: - fullname = os.path.join(path, name) - displayname = linkname = name - # Append / for directories or @ for symbolic links - if os.path.isdir(fullname): - displayname = name + "/" - linkname = name + "/" - if os.path.islink(fullname): - displayname = name + "@" - # Note: a link to a directory displays with @ and links with / - f.write('
  • %s\n' - % (urllib.quote(linkname), cgi.escape(displayname))) - f.write("
\n
\n\n\n") - length = f.tell() - f.seek(0) - self.send_response(200) - encoding = sys.getfilesystemencoding() - self.send_header("Content-type", "text/html; charset=%s" % encoding) - self.send_header("Content-Length", str(length)) - self.end_headers() - return f - - def translate_path(self, path): - """Translate a /-separated PATH to the local filename syntax. - - Components that mean special things to the local file system - (e.g. drive or directory names) are ignored. (XXX They should - probably be diagnosed.) - - """ - # abandon query parameters - path = path.split('?',1)[0] - path = path.split('#',1)[0] - # Don't forget explicit trailing slash when normalizing. Issue17324 - trailing_slash = path.rstrip().endswith('/') - path = posixpath.normpath(urllib.unquote(path)) - words = path.split('/') - words = filter(None, words) - path = os.getcwd() - for word in words: - if os.path.dirname(word) or word in (os.curdir, os.pardir): - # Ignore components that are not a simple file/directory name - continue - path = os.path.join(path, word) - if trailing_slash: - path += '/' - return path - - def copyfile(self, source, outputfile): - """Copy all data between two file objects. - - The SOURCE argument is a file object open for reading - (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). - - The only reason for overriding this would be to change - the block size or perhaps to replace newlines by CRLF - -- note however that this the default server uses this - to copy binary data as well. - - """ - shutil.copyfileobj(source, outputfile) - - def guess_type(self, path): - """Guess the type of a file. - - Argument is a PATH (a filename). - - Return value is a string of the form type/subtype, - usable for a MIME Content-type header. - - The default implementation looks the file's extension - up in the table self.extensions_map, using application/octet-stream - as a default; however it would be permissible (if - slow) to look inside the data to make a better guess. - - """ - - base, ext = posixpath.splitext(path) - if ext in self.extensions_map: - return self.extensions_map[ext] - ext = ext.lower() - if ext in self.extensions_map: - return self.extensions_map[ext] - else: - return self.extensions_map[''] - - if not mimetypes.inited: - mimetypes.init() # try to read system mime.types - extensions_map = mimetypes.types_map.copy() - extensions_map.update({ - '': 'application/octet-stream', # Default - '.py': 'text/plain', - '.c': 'text/plain', - '.h': 'text/plain', - }) - - -def test(HandlerClass = SimpleHTTPRequestHandler, - ServerClass = BaseHTTPServer.HTTPServer): - BaseHTTPServer.test(HandlerClass, ServerClass) - - -if __name__ == '__main__': - test() diff --git a/python/Lib/SimpleXMLRPCServer.py b/python/Lib/SimpleXMLRPCServer.py deleted file mode 100755 index 329395776f..0000000000 --- a/python/Lib/SimpleXMLRPCServer.py +++ /dev/null @@ -1,708 +0,0 @@ -r"""Simple XML-RPC Server. - -This module can be used to create simple XML-RPC servers -by creating a server and either installing functions, a -class instance, or by extending the SimpleXMLRPCServer -class. - -It can also be used to handle XML-RPC requests in a CGI -environment using CGIXMLRPCRequestHandler. - -A list of possible usage patterns follows: - -1. Install functions: - -server = SimpleXMLRPCServer(("localhost", 8000)) -server.register_function(pow) -server.register_function(lambda x,y: x+y, 'add') -server.serve_forever() - -2. Install an instance: - -class MyFuncs: - def __init__(self): - # make all of the string functions available through - # string.func_name - import string - self.string = string - def _listMethods(self): - # implement this method so that system.listMethods - # knows to advertise the strings methods - return list_public_methods(self) + \ - ['string.' + method for method in list_public_methods(self.string)] - def pow(self, x, y): return pow(x, y) - def add(self, x, y) : return x + y - -server = SimpleXMLRPCServer(("localhost", 8000)) -server.register_introspection_functions() -server.register_instance(MyFuncs()) -server.serve_forever() - -3. Install an instance with custom dispatch method: - -class Math: - def _listMethods(self): - # this method must be present for system.listMethods - # to work - return ['add', 'pow'] - def _methodHelp(self, method): - # this method must be present for system.methodHelp - # to work - if method == 'add': - return "add(2,3) => 5" - elif method == 'pow': - return "pow(x, y[, z]) => number" - else: - # By convention, return empty - # string if no help is available - return "" - def _dispatch(self, method, params): - if method == 'pow': - return pow(*params) - elif method == 'add': - return params[0] + params[1] - else: - raise 'bad method' - -server = SimpleXMLRPCServer(("localhost", 8000)) -server.register_introspection_functions() -server.register_instance(Math()) -server.serve_forever() - -4. Subclass SimpleXMLRPCServer: - -class MathServer(SimpleXMLRPCServer): - def _dispatch(self, method, params): - try: - # We are forcing the 'export_' prefix on methods that are - # callable through XML-RPC to prevent potential security - # problems - func = getattr(self, 'export_' + method) - except AttributeError: - raise Exception('method "%s" is not supported' % method) - else: - return func(*params) - - def export_add(self, x, y): - return x + y - -server = MathServer(("localhost", 8000)) -server.serve_forever() - -5. CGI script: - -server = CGIXMLRPCRequestHandler() -server.register_function(pow) -server.handle_request() -""" - -# Written by Brian Quinlan (brian@sweetapp.com). -# Based on code written by Fredrik Lundh. - -import xmlrpclib -from xmlrpclib import Fault -import SocketServer -import BaseHTTPServer -import sys -import os -import traceback -import re -try: - import fcntl -except ImportError: - fcntl = None - -def resolve_dotted_attribute(obj, attr, allow_dotted_names=True): - """resolve_dotted_attribute(a, 'b.c.d') => a.b.c.d - - Resolves a dotted attribute name to an object. Raises - an AttributeError if any attribute in the chain starts with a '_'. - - If the optional allow_dotted_names argument is false, dots are not - supported and this function operates similar to getattr(obj, attr). - """ - - if allow_dotted_names: - attrs = attr.split('.') - else: - attrs = [attr] - - for i in attrs: - if i.startswith('_'): - raise AttributeError( - 'attempt to access private attribute "%s"' % i - ) - else: - obj = getattr(obj,i) - return obj - -def list_public_methods(obj): - """Returns a list of attribute strings, found in the specified - object, which represent callable attributes""" - - return [member for member in dir(obj) - if not member.startswith('_') and - hasattr(getattr(obj, member), '__call__')] - -def remove_duplicates(lst): - """remove_duplicates([2,2,2,1,3,3]) => [3,1,2] - - Returns a copy of a list without duplicates. Every list - item must be hashable and the order of the items in the - resulting list is not defined. - """ - u = {} - for x in lst: - u[x] = 1 - - return u.keys() - -class SimpleXMLRPCDispatcher: - """Mix-in class that dispatches XML-RPC requests. - - This class is used to register XML-RPC method handlers - and then to dispatch them. This class doesn't need to be - instanced directly when used by SimpleXMLRPCServer but it - can be instanced when used by the MultiPathXMLRPCServer. - """ - - def __init__(self, allow_none=False, encoding=None): - self.funcs = {} - self.instance = None - self.allow_none = allow_none - self.encoding = encoding - - def register_instance(self, instance, allow_dotted_names=False): - """Registers an instance to respond to XML-RPC requests. - - Only one instance can be installed at a time. - - If the registered instance has a _dispatch method then that - method will be called with the name of the XML-RPC method and - its parameters as a tuple - e.g. instance._dispatch('add',(2,3)) - - If the registered instance does not have a _dispatch method - then the instance will be searched to find a matching method - and, if found, will be called. Methods beginning with an '_' - are considered private and will not be called by - SimpleXMLRPCServer. - - If a registered function matches an XML-RPC request, then it - will be called instead of the registered instance. - - If the optional allow_dotted_names argument is true and the - instance does not have a _dispatch method, method names - containing dots are supported and resolved, as long as none of - the name segments start with an '_'. - - *** SECURITY WARNING: *** - - Enabling the allow_dotted_names options allows intruders - to access your module's global variables and may allow - intruders to execute arbitrary code on your machine. Only - use this option on a secure, closed network. - - """ - - self.instance = instance - self.allow_dotted_names = allow_dotted_names - - def register_function(self, function, name = None): - """Registers a function to respond to XML-RPC requests. - - The optional name argument can be used to set a Unicode name - for the function. - """ - - if name is None: - name = function.__name__ - self.funcs[name] = function - - def register_introspection_functions(self): - """Registers the XML-RPC introspection methods in the system - namespace. - - see http://xmlrpc.usefulinc.com/doc/reserved.html - """ - - self.funcs.update({'system.listMethods' : self.system_listMethods, - 'system.methodSignature' : self.system_methodSignature, - 'system.methodHelp' : self.system_methodHelp}) - - def register_multicall_functions(self): - """Registers the XML-RPC multicall method in the system - namespace. - - see http://www.xmlrpc.com/discuss/msgReader$1208""" - - self.funcs.update({'system.multicall' : self.system_multicall}) - - def _marshaled_dispatch(self, data, dispatch_method = None, path = None): - """Dispatches an XML-RPC method from marshalled (XML) data. - - XML-RPC methods are dispatched from the marshalled (XML) data - using the _dispatch method and the result is returned as - marshalled data. For backwards compatibility, a dispatch - function can be provided as an argument (see comment in - SimpleXMLRPCRequestHandler.do_POST) but overriding the - existing method through subclassing is the preferred means - of changing method dispatch behavior. - """ - - try: - params, method = xmlrpclib.loads(data) - - # generate response - if dispatch_method is not None: - response = dispatch_method(method, params) - else: - response = self._dispatch(method, params) - # wrap response in a singleton tuple - response = (response,) - response = xmlrpclib.dumps(response, methodresponse=1, - allow_none=self.allow_none, encoding=self.encoding) - except Fault, fault: - response = xmlrpclib.dumps(fault, allow_none=self.allow_none, - encoding=self.encoding) - except: - # report exception back to server - exc_type, exc_value, exc_tb = sys.exc_info() - response = xmlrpclib.dumps( - xmlrpclib.Fault(1, "%s:%s" % (exc_type, exc_value)), - encoding=self.encoding, allow_none=self.allow_none, - ) - - return response - - def system_listMethods(self): - """system.listMethods() => ['add', 'subtract', 'multiple'] - - Returns a list of the methods supported by the server.""" - - methods = self.funcs.keys() - if self.instance is not None: - # Instance can implement _listMethod to return a list of - # methods - if hasattr(self.instance, '_listMethods'): - methods = remove_duplicates( - methods + self.instance._listMethods() - ) - # if the instance has a _dispatch method then we - # don't have enough information to provide a list - # of methods - elif not hasattr(self.instance, '_dispatch'): - methods = remove_duplicates( - methods + list_public_methods(self.instance) - ) - methods.sort() - return methods - - def system_methodSignature(self, method_name): - """system.methodSignature('add') => [double, int, int] - - Returns a list describing the signature of the method. In the - above example, the add method takes two integers as arguments - and returns a double result. - - This server does NOT support system.methodSignature.""" - - # See http://xmlrpc.usefulinc.com/doc/sysmethodsig.html - - return 'signatures not supported' - - def system_methodHelp(self, method_name): - """system.methodHelp('add') => "Adds two integers together" - - Returns a string containing documentation for the specified method.""" - - method = None - if method_name in self.funcs: - method = self.funcs[method_name] - elif self.instance is not None: - # Instance can implement _methodHelp to return help for a method - if hasattr(self.instance, '_methodHelp'): - return self.instance._methodHelp(method_name) - # if the instance has a _dispatch method then we - # don't have enough information to provide help - elif not hasattr(self.instance, '_dispatch'): - try: - method = resolve_dotted_attribute( - self.instance, - method_name, - self.allow_dotted_names - ) - except AttributeError: - pass - - # Note that we aren't checking that the method actually - # be a callable object of some kind - if method is None: - return "" - else: - import pydoc - return pydoc.getdoc(method) - - def system_multicall(self, call_list): - """system.multicall([{'methodName': 'add', 'params': [2, 2]}, ...]) => \ -[[4], ...] - - Allows the caller to package multiple XML-RPC calls into a single - request. - - See http://www.xmlrpc.com/discuss/msgReader$1208 - """ - - results = [] - for call in call_list: - method_name = call['methodName'] - params = call['params'] - - try: - # XXX A marshalling error in any response will fail the entire - # multicall. If someone cares they should fix this. - results.append([self._dispatch(method_name, params)]) - except Fault, fault: - results.append( - {'faultCode' : fault.faultCode, - 'faultString' : fault.faultString} - ) - except: - exc_type, exc_value, exc_tb = sys.exc_info() - results.append( - {'faultCode' : 1, - 'faultString' : "%s:%s" % (exc_type, exc_value)} - ) - return results - - def _dispatch(self, method, params): - """Dispatches the XML-RPC method. - - XML-RPC calls are forwarded to a registered function that - matches the called XML-RPC method name. If no such function - exists then the call is forwarded to the registered instance, - if available. - - If the registered instance has a _dispatch method then that - method will be called with the name of the XML-RPC method and - its parameters as a tuple - e.g. instance._dispatch('add',(2,3)) - - If the registered instance does not have a _dispatch method - then the instance will be searched to find a matching method - and, if found, will be called. - - Methods beginning with an '_' are considered private and will - not be called. - """ - - func = None - try: - # check to see if a matching function has been registered - func = self.funcs[method] - except KeyError: - if self.instance is not None: - # check for a _dispatch method - if hasattr(self.instance, '_dispatch'): - return self.instance._dispatch(method, params) - else: - # call instance method directly - try: - func = resolve_dotted_attribute( - self.instance, - method, - self.allow_dotted_names - ) - except AttributeError: - pass - - if func is not None: - return func(*params) - else: - raise Exception('method "%s" is not supported' % method) - -class SimpleXMLRPCRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): - """Simple XML-RPC request handler class. - - Handles all HTTP POST requests and attempts to decode them as - XML-RPC requests. - """ - - # Class attribute listing the accessible path components; - # paths not on this list will result in a 404 error. - rpc_paths = ('/', '/RPC2') - - #if not None, encode responses larger than this, if possible - encode_threshold = 1400 #a common MTU - - #Override form StreamRequestHandler: full buffering of output - #and no Nagle. - wbufsize = -1 - disable_nagle_algorithm = True - - # a re to match a gzip Accept-Encoding - aepattern = re.compile(r""" - \s* ([^\s;]+) \s* #content-coding - (;\s* q \s*=\s* ([0-9\.]+))? #q - """, re.VERBOSE | re.IGNORECASE) - - def accept_encodings(self): - r = {} - ae = self.headers.get("Accept-Encoding", "") - for e in ae.split(","): - match = self.aepattern.match(e) - if match: - v = match.group(3) - v = float(v) if v else 1.0 - r[match.group(1)] = v - return r - - def is_rpc_path_valid(self): - if self.rpc_paths: - return self.path in self.rpc_paths - else: - # If .rpc_paths is empty, just assume all paths are legal - return True - - def do_POST(self): - """Handles the HTTP POST request. - - Attempts to interpret all HTTP POST requests as XML-RPC calls, - which are forwarded to the server's _dispatch method for handling. - """ - - # Check that the path is legal - if not self.is_rpc_path_valid(): - self.report_404() - return - - try: - # Get arguments by reading body of request. - # We read this in chunks to avoid straining - # socket.read(); around the 10 or 15Mb mark, some platforms - # begin to have problems (bug #792570). - max_chunk_size = 10*1024*1024 - size_remaining = int(self.headers["content-length"]) - L = [] - while size_remaining: - chunk_size = min(size_remaining, max_chunk_size) - chunk = self.rfile.read(chunk_size) - if not chunk: - break - L.append(chunk) - size_remaining -= len(L[-1]) - data = ''.join(L) - - data = self.decode_request_content(data) - if data is None: - return #response has been sent - - # In previous versions of SimpleXMLRPCServer, _dispatch - # could be overridden in this class, instead of in - # SimpleXMLRPCDispatcher. To maintain backwards compatibility, - # check to see if a subclass implements _dispatch and dispatch - # using that method if present. - response = self.server._marshaled_dispatch( - data, getattr(self, '_dispatch', None), self.path - ) - except Exception, e: # This should only happen if the module is buggy - # internal error, report as HTTP server error - self.send_response(500) - - # Send information about the exception if requested - if hasattr(self.server, '_send_traceback_header') and \ - self.server._send_traceback_header: - self.send_header("X-exception", str(e)) - self.send_header("X-traceback", traceback.format_exc()) - - self.send_header("Content-length", "0") - self.end_headers() - else: - # got a valid XML RPC response - self.send_response(200) - self.send_header("Content-type", "text/xml") - if self.encode_threshold is not None: - if len(response) > self.encode_threshold: - q = self.accept_encodings().get("gzip", 0) - if q: - try: - response = xmlrpclib.gzip_encode(response) - self.send_header("Content-Encoding", "gzip") - except NotImplementedError: - pass - self.send_header("Content-length", str(len(response))) - self.end_headers() - self.wfile.write(response) - - def decode_request_content(self, data): - #support gzip encoding of request - encoding = self.headers.get("content-encoding", "identity").lower() - if encoding == "identity": - return data - if encoding == "gzip": - try: - return xmlrpclib.gzip_decode(data) - except NotImplementedError: - self.send_response(501, "encoding %r not supported" % encoding) - except ValueError: - self.send_response(400, "error decoding gzip content") - else: - self.send_response(501, "encoding %r not supported" % encoding) - self.send_header("Content-length", "0") - self.end_headers() - - def report_404 (self): - # Report a 404 error - self.send_response(404) - response = 'No such page' - self.send_header("Content-type", "text/plain") - self.send_header("Content-length", str(len(response))) - self.end_headers() - self.wfile.write(response) - - def log_request(self, code='-', size='-'): - """Selectively log an accepted request.""" - - if self.server.logRequests: - BaseHTTPServer.BaseHTTPRequestHandler.log_request(self, code, size) - -class SimpleXMLRPCServer(SocketServer.TCPServer, - SimpleXMLRPCDispatcher): - """Simple XML-RPC server. - - Simple XML-RPC server that allows functions and a single instance - to be installed to handle requests. The default implementation - attempts to dispatch XML-RPC calls to the functions or instance - installed in the server. Override the _dispatch method inhereted - from SimpleXMLRPCDispatcher to change this behavior. - """ - - allow_reuse_address = True - - # Warning: this is for debugging purposes only! Never set this to True in - # production code, as will be sending out sensitive information (exception - # and stack trace details) when exceptions are raised inside - # SimpleXMLRPCRequestHandler.do_POST - _send_traceback_header = False - - def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler, - logRequests=True, allow_none=False, encoding=None, bind_and_activate=True): - self.logRequests = logRequests - - SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding) - SocketServer.TCPServer.__init__(self, addr, requestHandler, bind_and_activate) - - # [Bug #1222790] If possible, set close-on-exec flag; if a - # method spawns a subprocess, the subprocess shouldn't have - # the listening socket open. - if fcntl is not None and hasattr(fcntl, 'FD_CLOEXEC'): - flags = fcntl.fcntl(self.fileno(), fcntl.F_GETFD) - flags |= fcntl.FD_CLOEXEC - fcntl.fcntl(self.fileno(), fcntl.F_SETFD, flags) - -class MultiPathXMLRPCServer(SimpleXMLRPCServer): - """Multipath XML-RPC Server - This specialization of SimpleXMLRPCServer allows the user to create - multiple Dispatcher instances and assign them to different - HTTP request paths. This makes it possible to run two or more - 'virtual XML-RPC servers' at the same port. - Make sure that the requestHandler accepts the paths in question. - """ - def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler, - logRequests=True, allow_none=False, encoding=None, bind_and_activate=True): - - SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, allow_none, - encoding, bind_and_activate) - self.dispatchers = {} - self.allow_none = allow_none - self.encoding = encoding - - def add_dispatcher(self, path, dispatcher): - self.dispatchers[path] = dispatcher - return dispatcher - - def get_dispatcher(self, path): - return self.dispatchers[path] - - def _marshaled_dispatch(self, data, dispatch_method = None, path = None): - try: - response = self.dispatchers[path]._marshaled_dispatch( - data, dispatch_method, path) - except: - # report low level exception back to server - # (each dispatcher should have handled their own - # exceptions) - exc_type, exc_value = sys.exc_info()[:2] - response = xmlrpclib.dumps( - xmlrpclib.Fault(1, "%s:%s" % (exc_type, exc_value)), - encoding=self.encoding, allow_none=self.allow_none) - return response - -class CGIXMLRPCRequestHandler(SimpleXMLRPCDispatcher): - """Simple handler for XML-RPC data passed through CGI.""" - - def __init__(self, allow_none=False, encoding=None): - SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding) - - def handle_xmlrpc(self, request_text): - """Handle a single XML-RPC request""" - - response = self._marshaled_dispatch(request_text) - - print 'Content-Type: text/xml' - print 'Content-Length: %d' % len(response) - print - sys.stdout.write(response) - - def handle_get(self): - """Handle a single HTTP GET request. - - Default implementation indicates an error because - XML-RPC uses the POST method. - """ - - code = 400 - message, explain = \ - BaseHTTPServer.BaseHTTPRequestHandler.responses[code] - - response = BaseHTTPServer.DEFAULT_ERROR_MESSAGE % \ - { - 'code' : code, - 'message' : message, - 'explain' : explain - } - print 'Status: %d %s' % (code, message) - print 'Content-Type: %s' % BaseHTTPServer.DEFAULT_ERROR_CONTENT_TYPE - print 'Content-Length: %d' % len(response) - print - sys.stdout.write(response) - - def handle_request(self, request_text = None): - """Handle a single XML-RPC request passed through a CGI post method. - - If no XML data is given then it is read from stdin. The resulting - XML-RPC response is printed to stdout along with the correct HTTP - headers. - """ - - if request_text is None and \ - os.environ.get('REQUEST_METHOD', None) == 'GET': - self.handle_get() - else: - # POST data is normally available through stdin - try: - length = int(os.environ.get('CONTENT_LENGTH', None)) - except (TypeError, ValueError): - length = -1 - if request_text is None: - request_text = sys.stdin.read(length) - - self.handle_xmlrpc(request_text) - -if __name__ == '__main__': - print 'Running XML-RPC server on port 8000' - server = SimpleXMLRPCServer(("localhost", 8000)) - server.register_function(pow) - server.register_function(lambda x,y: x+y, 'add') - server.register_multicall_functions() - server.serve_forever() diff --git a/python/Lib/SocketServer.py b/python/Lib/SocketServer.py deleted file mode 100755 index 122430e362..0000000000 --- a/python/Lib/SocketServer.py +++ /dev/null @@ -1,731 +0,0 @@ -"""Generic socket server classes. - -This module tries to capture the various aspects of defining a server: - -For socket-based servers: - -- address family: - - AF_INET{,6}: IP (Internet Protocol) sockets (default) - - AF_UNIX: Unix domain sockets - - others, e.g. AF_DECNET are conceivable (see -- socket type: - - SOCK_STREAM (reliable stream, e.g. TCP) - - SOCK_DGRAM (datagrams, e.g. UDP) - -For request-based servers (including socket-based): - -- client address verification before further looking at the request - (This is actually a hook for any processing that needs to look - at the request before anything else, e.g. logging) -- how to handle multiple requests: - - synchronous (one request is handled at a time) - - forking (each request is handled by a new process) - - threading (each request is handled by a new thread) - -The classes in this module favor the server type that is simplest to -write: a synchronous TCP/IP server. This is bad class design, but -save some typing. (There's also the issue that a deep class hierarchy -slows down method lookups.) - -There are five classes in an inheritance diagram, four of which represent -synchronous servers of four types: - - +------------+ - | BaseServer | - +------------+ - | - v - +-----------+ +------------------+ - | TCPServer |------->| UnixStreamServer | - +-----------+ +------------------+ - | - v - +-----------+ +--------------------+ - | UDPServer |------->| UnixDatagramServer | - +-----------+ +--------------------+ - -Note that UnixDatagramServer derives from UDPServer, not from -UnixStreamServer -- the only difference between an IP and a Unix -stream server is the address family, which is simply repeated in both -unix server classes. - -Forking and threading versions of each type of server can be created -using the ForkingMixIn and ThreadingMixIn mix-in classes. For -instance, a threading UDP server class is created as follows: - - class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass - -The Mix-in class must come first, since it overrides a method defined -in UDPServer! Setting the various member variables also changes -the behavior of the underlying server mechanism. - -To implement a service, you must derive a class from -BaseRequestHandler and redefine its handle() method. You can then run -various versions of the service by combining one of the server classes -with your request handler class. - -The request handler class must be different for datagram or stream -services. This can be hidden by using the request handler -subclasses StreamRequestHandler or DatagramRequestHandler. - -Of course, you still have to use your head! - -For instance, it makes no sense to use a forking server if the service -contains state in memory that can be modified by requests (since the -modifications in the child process would never reach the initial state -kept in the parent process and passed to each child). In this case, -you can use a threading server, but you will probably have to use -locks to avoid two requests that come in nearly simultaneous to apply -conflicting changes to the server state. - -On the other hand, if you are building e.g. an HTTP server, where all -data is stored externally (e.g. in the file system), a synchronous -class will essentially render the service "deaf" while one request is -being handled -- which may be for a very long time if a client is slow -to read all the data it has requested. Here a threading or forking -server is appropriate. - -In some cases, it may be appropriate to process part of a request -synchronously, but to finish processing in a forked child depending on -the request data. This can be implemented by using a synchronous -server and doing an explicit fork in the request handler class -handle() method. - -Another approach to handling multiple simultaneous requests in an -environment that supports neither threads nor fork (or where these are -too expensive or inappropriate for the service) is to maintain an -explicit table of partially finished requests and to use select() to -decide which request to work on next (or whether to handle a new -incoming request). This is particularly important for stream services -where each client can potentially be connected for a long time (if -threads or subprocesses cannot be used). - -Future work: -- Standard classes for Sun RPC (which uses either UDP or TCP) -- Standard mix-in classes to implement various authentication - and encryption schemes -- Standard framework for select-based multiplexing - -XXX Open problems: -- What to do with out-of-band data? - -BaseServer: -- split generic "request" functionality out into BaseServer class. - Copyright (C) 2000 Luke Kenneth Casson Leighton - - example: read entries from a SQL database (requires overriding - get_request() to return a table entry from the database). - entry is processed by a RequestHandlerClass. - -""" - -# Author of the BaseServer patch: Luke Kenneth Casson Leighton - -__version__ = "0.4" - - -import socket -import select -import sys -import os -import errno -try: - import threading -except ImportError: - import dummy_threading as threading - -__all__ = ["TCPServer","UDPServer","ForkingUDPServer","ForkingTCPServer", - "ThreadingUDPServer","ThreadingTCPServer","BaseRequestHandler", - "StreamRequestHandler","DatagramRequestHandler", - "ThreadingMixIn", "ForkingMixIn"] -if hasattr(socket, "AF_UNIX"): - __all__.extend(["UnixStreamServer","UnixDatagramServer", - "ThreadingUnixStreamServer", - "ThreadingUnixDatagramServer"]) - -def _eintr_retry(func, *args): - """restart a system call interrupted by EINTR""" - while True: - try: - return func(*args) - except (OSError, select.error) as e: - if e.args[0] != errno.EINTR: - raise - -class BaseServer: - - """Base class for server classes. - - Methods for the caller: - - - __init__(server_address, RequestHandlerClass) - - serve_forever(poll_interval=0.5) - - shutdown() - - handle_request() # if you do not use serve_forever() - - fileno() -> int # for select() - - Methods that may be overridden: - - - server_bind() - - server_activate() - - get_request() -> request, client_address - - handle_timeout() - - verify_request(request, client_address) - - server_close() - - process_request(request, client_address) - - shutdown_request(request) - - close_request(request) - - handle_error() - - Methods for derived classes: - - - finish_request(request, client_address) - - Class variables that may be overridden by derived classes or - instances: - - - timeout - - address_family - - socket_type - - allow_reuse_address - - Instance variables: - - - RequestHandlerClass - - socket - - """ - - timeout = None - - def __init__(self, server_address, RequestHandlerClass): - """Constructor. May be extended, do not override.""" - self.server_address = server_address - self.RequestHandlerClass = RequestHandlerClass - self.__is_shut_down = threading.Event() - self.__shutdown_request = False - - def server_activate(self): - """Called by constructor to activate the server. - - May be overridden. - - """ - pass - - def serve_forever(self, poll_interval=0.5): - """Handle one request at a time until shutdown. - - Polls for shutdown every poll_interval seconds. Ignores - self.timeout. If you need to do periodic tasks, do them in - another thread. - """ - self.__is_shut_down.clear() - try: - while not self.__shutdown_request: - # XXX: Consider using another file descriptor or - # connecting to the socket to wake this up instead of - # polling. Polling reduces our responsiveness to a - # shutdown request and wastes cpu at all other times. - r, w, e = _eintr_retry(select.select, [self], [], [], - poll_interval) - if self in r: - self._handle_request_noblock() - finally: - self.__shutdown_request = False - self.__is_shut_down.set() - - def shutdown(self): - """Stops the serve_forever loop. - - Blocks until the loop has finished. This must be called while - serve_forever() is running in another thread, or it will - deadlock. - """ - self.__shutdown_request = True - self.__is_shut_down.wait() - - # The distinction between handling, getting, processing and - # finishing a request is fairly arbitrary. Remember: - # - # - handle_request() is the top-level call. It calls - # select, get_request(), verify_request() and process_request() - # - get_request() is different for stream or datagram sockets - # - process_request() is the place that may fork a new process - # or create a new thread to finish the request - # - finish_request() instantiates the request handler class; - # this constructor will handle the request all by itself - - def handle_request(self): - """Handle one request, possibly blocking. - - Respects self.timeout. - """ - # Support people who used socket.settimeout() to escape - # handle_request before self.timeout was available. - timeout = self.socket.gettimeout() - if timeout is None: - timeout = self.timeout - elif self.timeout is not None: - timeout = min(timeout, self.timeout) - fd_sets = _eintr_retry(select.select, [self], [], [], timeout) - if not fd_sets[0]: - self.handle_timeout() - return - self._handle_request_noblock() - - def _handle_request_noblock(self): - """Handle one request, without blocking. - - I assume that select.select has returned that the socket is - readable before this function was called, so there should be - no risk of blocking in get_request(). - """ - try: - request, client_address = self.get_request() - except socket.error: - return - if self.verify_request(request, client_address): - try: - self.process_request(request, client_address) - except: - self.handle_error(request, client_address) - self.shutdown_request(request) - else: - self.shutdown_request(request) - - def handle_timeout(self): - """Called if no new request arrives within self.timeout. - - Overridden by ForkingMixIn. - """ - pass - - def verify_request(self, request, client_address): - """Verify the request. May be overridden. - - Return True if we should proceed with this request. - - """ - return True - - def process_request(self, request, client_address): - """Call finish_request. - - Overridden by ForkingMixIn and ThreadingMixIn. - - """ - self.finish_request(request, client_address) - self.shutdown_request(request) - - def server_close(self): - """Called to clean-up the server. - - May be overridden. - - """ - pass - - def finish_request(self, request, client_address): - """Finish one request by instantiating RequestHandlerClass.""" - self.RequestHandlerClass(request, client_address, self) - - def shutdown_request(self, request): - """Called to shutdown and close an individual request.""" - self.close_request(request) - - def close_request(self, request): - """Called to clean up an individual request.""" - pass - - def handle_error(self, request, client_address): - """Handle an error gracefully. May be overridden. - - The default is to print a traceback and continue. - - """ - print '-'*40 - print 'Exception happened during processing of request from', - print client_address - import traceback - traceback.print_exc() # XXX But this goes to stderr! - print '-'*40 - - -class TCPServer(BaseServer): - - """Base class for various socket-based server classes. - - Defaults to synchronous IP stream (i.e., TCP). - - Methods for the caller: - - - __init__(server_address, RequestHandlerClass, bind_and_activate=True) - - serve_forever(poll_interval=0.5) - - shutdown() - - handle_request() # if you don't use serve_forever() - - fileno() -> int # for select() - - Methods that may be overridden: - - - server_bind() - - server_activate() - - get_request() -> request, client_address - - handle_timeout() - - verify_request(request, client_address) - - process_request(request, client_address) - - shutdown_request(request) - - close_request(request) - - handle_error() - - Methods for derived classes: - - - finish_request(request, client_address) - - Class variables that may be overridden by derived classes or - instances: - - - timeout - - address_family - - socket_type - - request_queue_size (only for stream sockets) - - allow_reuse_address - - Instance variables: - - - server_address - - RequestHandlerClass - - socket - - """ - - address_family = socket.AF_INET - - socket_type = socket.SOCK_STREAM - - request_queue_size = 5 - - allow_reuse_address = False - - def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True): - """Constructor. May be extended, do not override.""" - BaseServer.__init__(self, server_address, RequestHandlerClass) - self.socket = socket.socket(self.address_family, - self.socket_type) - if bind_and_activate: - try: - self.server_bind() - self.server_activate() - except: - self.server_close() - raise - - def server_bind(self): - """Called by constructor to bind the socket. - - May be overridden. - - """ - if self.allow_reuse_address: - self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - self.socket.bind(self.server_address) - self.server_address = self.socket.getsockname() - - def server_activate(self): - """Called by constructor to activate the server. - - May be overridden. - - """ - self.socket.listen(self.request_queue_size) - - def server_close(self): - """Called to clean-up the server. - - May be overridden. - - """ - self.socket.close() - - def fileno(self): - """Return socket file number. - - Interface required by select(). - - """ - return self.socket.fileno() - - def get_request(self): - """Get the request and client address from the socket. - - May be overridden. - - """ - return self.socket.accept() - - def shutdown_request(self, request): - """Called to shutdown and close an individual request.""" - try: - #explicitly shutdown. socket.close() merely releases - #the socket and waits for GC to perform the actual close. - request.shutdown(socket.SHUT_WR) - except socket.error: - pass #some platforms may raise ENOTCONN here - self.close_request(request) - - def close_request(self, request): - """Called to clean up an individual request.""" - request.close() - - -class UDPServer(TCPServer): - - """UDP server class.""" - - allow_reuse_address = False - - socket_type = socket.SOCK_DGRAM - - max_packet_size = 8192 - - def get_request(self): - data, client_addr = self.socket.recvfrom(self.max_packet_size) - return (data, self.socket), client_addr - - def server_activate(self): - # No need to call listen() for UDP. - pass - - def shutdown_request(self, request): - # No need to shutdown anything. - self.close_request(request) - - def close_request(self, request): - # No need to close anything. - pass - -class ForkingMixIn: - - """Mix-in class to handle each request in a new process.""" - - timeout = 300 - active_children = None - max_children = 40 - - def collect_children(self): - """Internal routine to wait for children that have exited.""" - if self.active_children is None: - return - - # If we're above the max number of children, wait and reap them until - # we go back below threshold. Note that we use waitpid(-1) below to be - # able to collect children in size() syscalls instead - # of size(): the downside is that this might reap children - # which we didn't spawn, which is why we only resort to this when we're - # above max_children. - while len(self.active_children) >= self.max_children: - try: - pid, _ = os.waitpid(-1, 0) - self.active_children.discard(pid) - except OSError as e: - if e.errno == errno.ECHILD: - # we don't have any children, we're done - self.active_children.clear() - elif e.errno != errno.EINTR: - break - - # Now reap all defunct children. - for pid in self.active_children.copy(): - try: - pid, _ = os.waitpid(pid, os.WNOHANG) - # if the child hasn't exited yet, pid will be 0 and ignored by - # discard() below - self.active_children.discard(pid) - except OSError as e: - if e.errno == errno.ECHILD: - # someone else reaped it - self.active_children.discard(pid) - - def handle_timeout(self): - """Wait for zombies after self.timeout seconds of inactivity. - - May be extended, do not override. - """ - self.collect_children() - - def process_request(self, request, client_address): - """Fork a new subprocess to process the request.""" - self.collect_children() - pid = os.fork() - if pid: - # Parent process - if self.active_children is None: - self.active_children = set() - self.active_children.add(pid) - self.close_request(request) #close handle in parent process - return - else: - # Child process. - # This must never return, hence os._exit()! - try: - self.finish_request(request, client_address) - self.shutdown_request(request) - os._exit(0) - except: - try: - self.handle_error(request, client_address) - self.shutdown_request(request) - finally: - os._exit(1) - - -class ThreadingMixIn: - """Mix-in class to handle each request in a new thread.""" - - # Decides how threads will act upon termination of the - # main process - daemon_threads = False - - def process_request_thread(self, request, client_address): - """Same as in BaseServer but as a thread. - - In addition, exception handling is done here. - - """ - try: - self.finish_request(request, client_address) - self.shutdown_request(request) - except: - self.handle_error(request, client_address) - self.shutdown_request(request) - - def process_request(self, request, client_address): - """Start a new thread to process the request.""" - t = threading.Thread(target = self.process_request_thread, - args = (request, client_address)) - t.daemon = self.daemon_threads - t.start() - - -class ForkingUDPServer(ForkingMixIn, UDPServer): pass -class ForkingTCPServer(ForkingMixIn, TCPServer): pass - -class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass -class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass - -if hasattr(socket, 'AF_UNIX'): - - class UnixStreamServer(TCPServer): - address_family = socket.AF_UNIX - - class UnixDatagramServer(UDPServer): - address_family = socket.AF_UNIX - - class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass - - class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass - -class BaseRequestHandler: - - """Base class for request handler classes. - - This class is instantiated for each request to be handled. The - constructor sets the instance variables request, client_address - and server, and then calls the handle() method. To implement a - specific service, all you need to do is to derive a class which - defines a handle() method. - - The handle() method can find the request as self.request, the - client address as self.client_address, and the server (in case it - needs access to per-server information) as self.server. Since a - separate instance is created for each request, the handle() method - can define other arbitrary instance variables. - - """ - - def __init__(self, request, client_address, server): - self.request = request - self.client_address = client_address - self.server = server - self.setup() - try: - self.handle() - finally: - self.finish() - - def setup(self): - pass - - def handle(self): - pass - - def finish(self): - pass - - -# The following two classes make it possible to use the same service -# class for stream or datagram servers. -# Each class sets up these instance variables: -# - rfile: a file object from which receives the request is read -# - wfile: a file object to which the reply is written -# When the handle() method returns, wfile is flushed properly - - -class StreamRequestHandler(BaseRequestHandler): - - """Define self.rfile and self.wfile for stream sockets.""" - - # Default buffer sizes for rfile, wfile. - # We default rfile to buffered because otherwise it could be - # really slow for large data (a getc() call per byte); we make - # wfile unbuffered because (a) often after a write() we want to - # read and we need to flush the line; (b) big writes to unbuffered - # files are typically optimized by stdio even when big reads - # aren't. - rbufsize = -1 - wbufsize = 0 - - # A timeout to apply to the request socket, if not None. - timeout = None - - # Disable nagle algorithm for this socket, if True. - # Use only when wbufsize != 0, to avoid small packets. - disable_nagle_algorithm = False - - def setup(self): - self.connection = self.request - if self.timeout is not None: - self.connection.settimeout(self.timeout) - if self.disable_nagle_algorithm: - self.connection.setsockopt(socket.IPPROTO_TCP, - socket.TCP_NODELAY, True) - self.rfile = self.connection.makefile('rb', self.rbufsize) - self.wfile = self.connection.makefile('wb', self.wbufsize) - - def finish(self): - if not self.wfile.closed: - try: - self.wfile.flush() - except socket.error: - # A final socket error may have occurred here, such as - # the local error ECONNABORTED. - pass - self.wfile.close() - self.rfile.close() - - -class DatagramRequestHandler(BaseRequestHandler): - - """Define self.rfile and self.wfile for datagram sockets.""" - - def setup(self): - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - self.packet, self.socket = self.request - self.rfile = StringIO(self.packet) - self.wfile = StringIO() - - def finish(self): - self.socket.sendto(self.wfile.getvalue(), self.client_address) diff --git a/python/Lib/StringIO.py b/python/Lib/StringIO.py deleted file mode 100755 index b63525b9bf..0000000000 --- a/python/Lib/StringIO.py +++ /dev/null @@ -1,324 +0,0 @@ -r"""File-like objects that read from or write to a string buffer. - -This implements (nearly) all stdio methods. - -f = StringIO() # ready for writing -f = StringIO(buf) # ready for reading -f.close() # explicitly release resources held -flag = f.isatty() # always false -pos = f.tell() # get current position -f.seek(pos) # set current position -f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF -buf = f.read() # read until EOF -buf = f.read(n) # read up to n bytes -buf = f.readline() # read until end of line ('\n') or EOF -list = f.readlines()# list of f.readline() results until EOF -f.truncate([size]) # truncate file at to at most size (default: current pos) -f.write(buf) # write at current position -f.writelines(list) # for line in list: f.write(line) -f.getvalue() # return whole file's contents as a string - -Notes: -- Using a real file is often faster (but less convenient). -- There's also a much faster implementation in C, called cStringIO, but - it's not subclassable. -- fileno() is left unimplemented so that code which uses it triggers - an exception early. -- Seeking far beyond EOF and then writing will insert real null - bytes that occupy space in the buffer. -- There's a simple test set (see end of this file). -""" -try: - from errno import EINVAL -except ImportError: - EINVAL = 22 - -__all__ = ["StringIO"] - -def _complain_ifclosed(closed): - if closed: - raise ValueError, "I/O operation on closed file" - -class StringIO: - """class StringIO([buffer]) - - When a StringIO object is created, it can be initialized to an existing - string by passing the string to the constructor. If no string is given, - the StringIO will start empty. - - The StringIO object can accept either Unicode or 8-bit strings, but - mixing the two may take some care. If both are used, 8-bit strings that - cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause - a UnicodeError to be raised when getvalue() is called. - """ - def __init__(self, buf = ''): - # Force self.buf to be a string or unicode - if not isinstance(buf, basestring): - buf = str(buf) - self.buf = buf - self.len = len(buf) - self.buflist = [] - self.pos = 0 - self.closed = False - self.softspace = 0 - - def __iter__(self): - return self - - def next(self): - """A file object is its own iterator, for example iter(f) returns f - (unless f is closed). When a file is used as an iterator, typically - in a for loop (for example, for line in f: print line), the next() - method is called repeatedly. This method returns the next input line, - or raises StopIteration when EOF is hit. - """ - _complain_ifclosed(self.closed) - r = self.readline() - if not r: - raise StopIteration - return r - - def close(self): - """Free the memory buffer. - """ - if not self.closed: - self.closed = True - del self.buf, self.pos - - def isatty(self): - """Returns False because StringIO objects are not connected to a - tty-like device. - """ - _complain_ifclosed(self.closed) - return False - - def seek(self, pos, mode = 0): - """Set the file's current position. - - The mode argument is optional and defaults to 0 (absolute file - positioning); other values are 1 (seek relative to the current - position) and 2 (seek relative to the file's end). - - There is no return value. - """ - _complain_ifclosed(self.closed) - if self.buflist: - self.buf += ''.join(self.buflist) - self.buflist = [] - if mode == 1: - pos += self.pos - elif mode == 2: - pos += self.len - self.pos = max(0, pos) - - def tell(self): - """Return the file's current position.""" - _complain_ifclosed(self.closed) - return self.pos - - def read(self, n = -1): - """Read at most size bytes from the file - (less if the read hits EOF before obtaining size bytes). - - If the size argument is negative or omitted, read all data until EOF - is reached. The bytes are returned as a string object. An empty - string is returned when EOF is encountered immediately. - """ - _complain_ifclosed(self.closed) - if self.buflist: - self.buf += ''.join(self.buflist) - self.buflist = [] - if n is None or n < 0: - newpos = self.len - else: - newpos = min(self.pos+n, self.len) - r = self.buf[self.pos:newpos] - self.pos = newpos - return r - - def readline(self, length=None): - r"""Read one entire line from the file. - - A trailing newline character is kept in the string (but may be absent - when a file ends with an incomplete line). If the size argument is - present and non-negative, it is a maximum byte count (including the - trailing newline) and an incomplete line may be returned. - - An empty string is returned only when EOF is encountered immediately. - - Note: Unlike stdio's fgets(), the returned string contains null - characters ('\0') if they occurred in the input. - """ - _complain_ifclosed(self.closed) - if self.buflist: - self.buf += ''.join(self.buflist) - self.buflist = [] - i = self.buf.find('\n', self.pos) - if i < 0: - newpos = self.len - else: - newpos = i+1 - if length is not None and length >= 0: - if self.pos + length < newpos: - newpos = self.pos + length - r = self.buf[self.pos:newpos] - self.pos = newpos - return r - - def readlines(self, sizehint = 0): - """Read until EOF using readline() and return a list containing the - lines thus read. - - If the optional sizehint argument is present, instead of reading up - to EOF, whole lines totalling approximately sizehint bytes (or more - to accommodate a final whole line). - """ - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - - def truncate(self, size=None): - """Truncate the file's size. - - If the optional size argument is present, the file is truncated to - (at most) that size. The size defaults to the current position. - The current file position is not changed unless the position - is beyond the new file size. - - If the specified size exceeds the file's current size, the - file remains unchanged. - """ - _complain_ifclosed(self.closed) - if size is None: - size = self.pos - elif size < 0: - raise IOError(EINVAL, "Negative size not allowed") - elif size < self.pos: - self.pos = size - self.buf = self.getvalue()[:size] - self.len = size - - def write(self, s): - """Write a string to the file. - - There is no return value. - """ - _complain_ifclosed(self.closed) - if not s: return - # Force s to be a string or unicode - if not isinstance(s, basestring): - s = str(s) - spos = self.pos - slen = self.len - if spos == slen: - self.buflist.append(s) - self.len = self.pos = spos + len(s) - return - if spos > slen: - self.buflist.append('\0'*(spos - slen)) - slen = spos - newpos = spos + len(s) - if spos < slen: - if self.buflist: - self.buf += ''.join(self.buflist) - self.buflist = [self.buf[:spos], s, self.buf[newpos:]] - self.buf = '' - if newpos > slen: - slen = newpos - else: - self.buflist.append(s) - slen = newpos - self.len = slen - self.pos = newpos - - def writelines(self, iterable): - """Write a sequence of strings to the file. The sequence can be any - iterable object producing strings, typically a list of strings. There - is no return value. - - (The name is intended to match readlines(); writelines() does not add - line separators.) - """ - write = self.write - for line in iterable: - write(line) - - def flush(self): - """Flush the internal buffer - """ - _complain_ifclosed(self.closed) - - def getvalue(self): - """ - Retrieve the entire contents of the "file" at any time before - the StringIO object's close() method is called. - - The StringIO object can accept either Unicode or 8-bit strings, - but mixing the two may take some care. If both are used, 8-bit - strings that cannot be interpreted as 7-bit ASCII (that use the - 8th bit) will cause a UnicodeError to be raised when getvalue() - is called. - """ - _complain_ifclosed(self.closed) - if self.buflist: - self.buf += ''.join(self.buflist) - self.buflist = [] - return self.buf - - -# A little test suite - -def test(): - import sys - if sys.argv[1:]: - file = sys.argv[1] - else: - file = '/etc/passwd' - lines = open(file, 'r').readlines() - text = open(file, 'r').read() - f = StringIO() - for line in lines[:-2]: - f.write(line) - f.writelines(lines[-2:]) - if f.getvalue() != text: - raise RuntimeError, 'write failed' - length = f.tell() - print 'File length =', length - f.seek(len(lines[0])) - f.write(lines[1]) - f.seek(0) - print 'First line =', repr(f.readline()) - print 'Position =', f.tell() - line = f.readline() - print 'Second line =', repr(line) - f.seek(-len(line), 1) - line2 = f.read(len(line)) - if line != line2: - raise RuntimeError, 'bad result after seek back' - f.seek(len(line2), 1) - list = f.readlines() - line = list[-1] - f.seek(f.tell() - len(line)) - line2 = f.read() - if line != line2: - raise RuntimeError, 'bad result after seek back from EOF' - print 'Read', len(list), 'more lines' - print 'File length =', f.tell() - if f.tell() != length: - raise RuntimeError, 'bad length' - f.truncate(length/2) - f.seek(0, 2) - print 'Truncated length =', f.tell() - if f.tell() != length/2: - raise RuntimeError, 'truncate did not adjust length' - f.close() - -if __name__ == '__main__': - test() diff --git a/python/Lib/UserDict.py b/python/Lib/UserDict.py deleted file mode 100755 index 732b327c3e..0000000000 --- a/python/Lib/UserDict.py +++ /dev/null @@ -1,213 +0,0 @@ -"""A more or less complete user-defined wrapper around dictionary objects.""" - -class UserDict: - def __init__(*args, **kwargs): - if not args: - raise TypeError("descriptor '__init__' of 'UserDict' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - if args: - dict = args[0] - elif 'dict' in kwargs: - dict = kwargs.pop('dict') - import warnings - warnings.warn("Passing 'dict' as keyword argument is " - "deprecated", PendingDeprecationWarning, - stacklevel=2) - else: - dict = None - self.data = {} - if dict is not None: - self.update(dict) - if len(kwargs): - self.update(kwargs) - def __repr__(self): return repr(self.data) - def __cmp__(self, dict): - if isinstance(dict, UserDict): - return cmp(self.data, dict.data) - else: - return cmp(self.data, dict) - __hash__ = None # Avoid Py3k warning - def __len__(self): return len(self.data) - def __getitem__(self, key): - if key in self.data: - return self.data[key] - if hasattr(self.__class__, "__missing__"): - return self.__class__.__missing__(self, key) - raise KeyError(key) - def __setitem__(self, key, item): self.data[key] = item - def __delitem__(self, key): del self.data[key] - def clear(self): self.data.clear() - def copy(self): - if self.__class__ is UserDict: - return UserDict(self.data.copy()) - import copy - data = self.data - try: - self.data = {} - c = copy.copy(self) - finally: - self.data = data - c.update(self) - return c - def keys(self): return self.data.keys() - def items(self): return self.data.items() - def iteritems(self): return self.data.iteritems() - def iterkeys(self): return self.data.iterkeys() - def itervalues(self): return self.data.itervalues() - def values(self): return self.data.values() - def has_key(self, key): return key in self.data - def update(*args, **kwargs): - if not args: - raise TypeError("descriptor 'update' of 'UserDict' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - if args: - dict = args[0] - elif 'dict' in kwargs: - dict = kwargs.pop('dict') - import warnings - warnings.warn("Passing 'dict' as keyword argument is deprecated", - PendingDeprecationWarning, stacklevel=2) - else: - dict = None - if dict is None: - pass - elif isinstance(dict, UserDict): - self.data.update(dict.data) - elif isinstance(dict, type({})) or not hasattr(dict, 'items'): - self.data.update(dict) - else: - for k, v in dict.items(): - self[k] = v - if len(kwargs): - self.data.update(kwargs) - def get(self, key, failobj=None): - if key not in self: - return failobj - return self[key] - def setdefault(self, key, failobj=None): - if key not in self: - self[key] = failobj - return self[key] - def pop(self, key, *args): - return self.data.pop(key, *args) - def popitem(self): - return self.data.popitem() - def __contains__(self, key): - return key in self.data - @classmethod - def fromkeys(cls, iterable, value=None): - d = cls() - for key in iterable: - d[key] = value - return d - -class IterableUserDict(UserDict): - def __iter__(self): - return iter(self.data) - -import _abcoll -_abcoll.MutableMapping.register(IterableUserDict) - - -class DictMixin: - # Mixin defining all dictionary methods for classes that already have - # a minimum dictionary interface including getitem, setitem, delitem, - # and keys. Without knowledge of the subclass constructor, the mixin - # does not define __init__() or copy(). In addition to the four base - # methods, progressively more efficiency comes with defining - # __contains__(), __iter__(), and iteritems(). - - # second level definitions support higher levels - def __iter__(self): - for k in self.keys(): - yield k - def has_key(self, key): - try: - self[key] - except KeyError: - return False - return True - def __contains__(self, key): - return self.has_key(key) - - # third level takes advantage of second level definitions - def iteritems(self): - for k in self: - yield (k, self[k]) - def iterkeys(self): - return self.__iter__() - - # fourth level uses definitions from lower levels - def itervalues(self): - for _, v in self.iteritems(): - yield v - def values(self): - return [v for _, v in self.iteritems()] - def items(self): - return list(self.iteritems()) - def clear(self): - for key in self.keys(): - del self[key] - def setdefault(self, key, default=None): - try: - return self[key] - except KeyError: - self[key] = default - return default - def pop(self, key, *args): - if len(args) > 1: - raise TypeError, "pop expected at most 2 arguments, got "\ - + repr(1 + len(args)) - try: - value = self[key] - except KeyError: - if args: - return args[0] - raise - del self[key] - return value - def popitem(self): - try: - k, v = self.iteritems().next() - except StopIteration: - raise KeyError, 'container is empty' - del self[k] - return (k, v) - def update(self, other=None, **kwargs): - # Make progressively weaker assumptions about "other" - if other is None: - pass - elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups - for k, v in other.iteritems(): - self[k] = v - elif hasattr(other, 'keys'): - for k in other.keys(): - self[k] = other[k] - else: - for k, v in other: - self[k] = v - if kwargs: - self.update(kwargs) - def get(self, key, default=None): - try: - return self[key] - except KeyError: - return default - def __repr__(self): - return repr(dict(self.iteritems())) - def __cmp__(self, other): - if other is None: - return 1 - if isinstance(other, DictMixin): - other = dict(other.iteritems()) - return cmp(dict(self.iteritems()), other) - def __len__(self): - return len(self.keys()) diff --git a/python/Lib/UserList.py b/python/Lib/UserList.py deleted file mode 100755 index b445985707..0000000000 --- a/python/Lib/UserList.py +++ /dev/null @@ -1,88 +0,0 @@ -"""A more or less complete user-defined wrapper around list objects.""" - -import collections - -class UserList(collections.MutableSequence): - def __init__(self, initlist=None): - self.data = [] - if initlist is not None: - # XXX should this accept an arbitrary sequence? - if type(initlist) == type(self.data): - self.data[:] = initlist - elif isinstance(initlist, UserList): - self.data[:] = initlist.data[:] - else: - self.data = list(initlist) - def __repr__(self): return repr(self.data) - def __lt__(self, other): return self.data < self.__cast(other) - def __le__(self, other): return self.data <= self.__cast(other) - def __eq__(self, other): return self.data == self.__cast(other) - def __ne__(self, other): return self.data != self.__cast(other) - def __gt__(self, other): return self.data > self.__cast(other) - def __ge__(self, other): return self.data >= self.__cast(other) - def __cast(self, other): - if isinstance(other, UserList): return other.data - else: return other - def __cmp__(self, other): - return cmp(self.data, self.__cast(other)) - __hash__ = None # Mutable sequence, so not hashable - def __contains__(self, item): return item in self.data - def __len__(self): return len(self.data) - def __getitem__(self, i): return self.data[i] - def __setitem__(self, i, item): self.data[i] = item - def __delitem__(self, i): del self.data[i] - def __getslice__(self, i, j): - i = max(i, 0); j = max(j, 0) - return self.__class__(self.data[i:j]) - def __setslice__(self, i, j, other): - i = max(i, 0); j = max(j, 0) - if isinstance(other, UserList): - self.data[i:j] = other.data - elif isinstance(other, type(self.data)): - self.data[i:j] = other - else: - self.data[i:j] = list(other) - def __delslice__(self, i, j): - i = max(i, 0); j = max(j, 0) - del self.data[i:j] - def __add__(self, other): - if isinstance(other, UserList): - return self.__class__(self.data + other.data) - elif isinstance(other, type(self.data)): - return self.__class__(self.data + other) - else: - return self.__class__(self.data + list(other)) - def __radd__(self, other): - if isinstance(other, UserList): - return self.__class__(other.data + self.data) - elif isinstance(other, type(self.data)): - return self.__class__(other + self.data) - else: - return self.__class__(list(other) + self.data) - def __iadd__(self, other): - if isinstance(other, UserList): - self.data += other.data - elif isinstance(other, type(self.data)): - self.data += other - else: - self.data += list(other) - return self - def __mul__(self, n): - return self.__class__(self.data*n) - __rmul__ = __mul__ - def __imul__(self, n): - self.data *= n - return self - def append(self, item): self.data.append(item) - def insert(self, i, item): self.data.insert(i, item) - def pop(self, i=-1): return self.data.pop(i) - def remove(self, item): self.data.remove(item) - def count(self, item): return self.data.count(item) - def index(self, item, *args): return self.data.index(item, *args) - def reverse(self): self.data.reverse() - def sort(self, *args, **kwds): self.data.sort(*args, **kwds) - def extend(self, other): - if isinstance(other, UserList): - self.data.extend(other.data) - else: - self.data.extend(other) diff --git a/python/Lib/UserString.py b/python/Lib/UserString.py deleted file mode 100755 index 726b3f7d3c..0000000000 --- a/python/Lib/UserString.py +++ /dev/null @@ -1,228 +0,0 @@ -#!/usr/bin/env python -## vim:ts=4:et:nowrap -"""A user-defined wrapper around string objects - -Note: string objects have grown methods in Python 1.6 -This module requires Python 1.6 or later. -""" -import sys -import collections - -__all__ = ["UserString","MutableString"] - -class UserString(collections.Sequence): - def __init__(self, seq): - if isinstance(seq, basestring): - self.data = seq - elif isinstance(seq, UserString): - self.data = seq.data[:] - else: - self.data = str(seq) - def __str__(self): return str(self.data) - def __repr__(self): return repr(self.data) - def __int__(self): return int(self.data) - def __long__(self): return long(self.data) - def __float__(self): return float(self.data) - def __complex__(self): return complex(self.data) - def __hash__(self): return hash(self.data) - - def __cmp__(self, string): - if isinstance(string, UserString): - return cmp(self.data, string.data) - else: - return cmp(self.data, string) - def __contains__(self, char): - return char in self.data - - def __len__(self): return len(self.data) - def __getitem__(self, index): return self.__class__(self.data[index]) - def __getslice__(self, start, end): - start = max(start, 0); end = max(end, 0) - return self.__class__(self.data[start:end]) - - def __add__(self, other): - if isinstance(other, UserString): - return self.__class__(self.data + other.data) - elif isinstance(other, basestring): - return self.__class__(self.data + other) - else: - return self.__class__(self.data + str(other)) - def __radd__(self, other): - if isinstance(other, basestring): - return self.__class__(other + self.data) - else: - return self.__class__(str(other) + self.data) - def __mul__(self, n): - return self.__class__(self.data*n) - __rmul__ = __mul__ - def __mod__(self, args): - return self.__class__(self.data % args) - - # the following methods are defined in alphabetical order: - def capitalize(self): return self.__class__(self.data.capitalize()) - def center(self, width, *args): - return self.__class__(self.data.center(width, *args)) - def count(self, sub, start=0, end=sys.maxint): - return self.data.count(sub, start, end) - def decode(self, encoding=None, errors=None): # XXX improve this? - if encoding: - if errors: - return self.__class__(self.data.decode(encoding, errors)) - else: - return self.__class__(self.data.decode(encoding)) - else: - return self.__class__(self.data.decode()) - def encode(self, encoding=None, errors=None): # XXX improve this? - if encoding: - if errors: - return self.__class__(self.data.encode(encoding, errors)) - else: - return self.__class__(self.data.encode(encoding)) - else: - return self.__class__(self.data.encode()) - def endswith(self, suffix, start=0, end=sys.maxint): - return self.data.endswith(suffix, start, end) - def expandtabs(self, tabsize=8): - return self.__class__(self.data.expandtabs(tabsize)) - def find(self, sub, start=0, end=sys.maxint): - return self.data.find(sub, start, end) - def index(self, sub, start=0, end=sys.maxint): - return self.data.index(sub, start, end) - def isalpha(self): return self.data.isalpha() - def isalnum(self): return self.data.isalnum() - def isdecimal(self): return self.data.isdecimal() - def isdigit(self): return self.data.isdigit() - def islower(self): return self.data.islower() - def isnumeric(self): return self.data.isnumeric() - def isspace(self): return self.data.isspace() - def istitle(self): return self.data.istitle() - def isupper(self): return self.data.isupper() - def join(self, seq): return self.data.join(seq) - def ljust(self, width, *args): - return self.__class__(self.data.ljust(width, *args)) - def lower(self): return self.__class__(self.data.lower()) - def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars)) - def partition(self, sep): - return self.data.partition(sep) - def replace(self, old, new, maxsplit=-1): - return self.__class__(self.data.replace(old, new, maxsplit)) - def rfind(self, sub, start=0, end=sys.maxint): - return self.data.rfind(sub, start, end) - def rindex(self, sub, start=0, end=sys.maxint): - return self.data.rindex(sub, start, end) - def rjust(self, width, *args): - return self.__class__(self.data.rjust(width, *args)) - def rpartition(self, sep): - return self.data.rpartition(sep) - def rstrip(self, chars=None): return self.__class__(self.data.rstrip(chars)) - def split(self, sep=None, maxsplit=-1): - return self.data.split(sep, maxsplit) - def rsplit(self, sep=None, maxsplit=-1): - return self.data.rsplit(sep, maxsplit) - def splitlines(self, keepends=0): return self.data.splitlines(keepends) - def startswith(self, prefix, start=0, end=sys.maxint): - return self.data.startswith(prefix, start, end) - def strip(self, chars=None): return self.__class__(self.data.strip(chars)) - def swapcase(self): return self.__class__(self.data.swapcase()) - def title(self): return self.__class__(self.data.title()) - def translate(self, *args): - return self.__class__(self.data.translate(*args)) - def upper(self): return self.__class__(self.data.upper()) - def zfill(self, width): return self.__class__(self.data.zfill(width)) - -class MutableString(UserString, collections.MutableSequence): - """mutable string objects - - Python strings are immutable objects. This has the advantage, that - strings may be used as dictionary keys. If this property isn't needed - and you insist on changing string values in place instead, you may cheat - and use MutableString. - - But the purpose of this class is an educational one: to prevent - people from inventing their own mutable string class derived - from UserString and than forget thereby to remove (override) the - __hash__ method inherited from UserString. This would lead to - errors that would be very hard to track down. - - A faster and better solution is to rewrite your program using lists.""" - def __init__(self, string=""): - from warnings import warnpy3k - warnpy3k('the class UserString.MutableString has been removed in ' - 'Python 3.0', stacklevel=2) - self.data = string - - # We inherit object.__hash__, so we must deny this explicitly - __hash__ = None - - def __setitem__(self, index, sub): - if isinstance(index, slice): - if isinstance(sub, UserString): - sub = sub.data - elif not isinstance(sub, basestring): - sub = str(sub) - start, stop, step = index.indices(len(self.data)) - if step == -1: - start, stop = stop+1, start+1 - sub = sub[::-1] - elif step != 1: - # XXX(twouters): I guess we should be reimplementing - # the extended slice assignment/deletion algorithm here... - raise TypeError, "invalid step in slicing assignment" - start = min(start, stop) - self.data = self.data[:start] + sub + self.data[stop:] - else: - if index < 0: - index += len(self.data) - if index < 0 or index >= len(self.data): raise IndexError - self.data = self.data[:index] + sub + self.data[index+1:] - def __delitem__(self, index): - if isinstance(index, slice): - start, stop, step = index.indices(len(self.data)) - if step == -1: - start, stop = stop+1, start+1 - elif step != 1: - # XXX(twouters): see same block in __setitem__ - raise TypeError, "invalid step in slicing deletion" - start = min(start, stop) - self.data = self.data[:start] + self.data[stop:] - else: - if index < 0: - index += len(self.data) - if index < 0 or index >= len(self.data): raise IndexError - self.data = self.data[:index] + self.data[index+1:] - def __setslice__(self, start, end, sub): - start = max(start, 0); end = max(end, 0) - if isinstance(sub, UserString): - self.data = self.data[:start]+sub.data+self.data[end:] - elif isinstance(sub, basestring): - self.data = self.data[:start]+sub+self.data[end:] - else: - self.data = self.data[:start]+str(sub)+self.data[end:] - def __delslice__(self, start, end): - start = max(start, 0); end = max(end, 0) - self.data = self.data[:start] + self.data[end:] - def immutable(self): - return UserString(self.data) - def __iadd__(self, other): - if isinstance(other, UserString): - self.data += other.data - elif isinstance(other, basestring): - self.data += other - else: - self.data += str(other) - return self - def __imul__(self, n): - self.data *= n - return self - def insert(self, index, value): - self[index:index] = value - -if __name__ == "__main__": - # execute the regression test to stdout, if called as a script: - import os - called_in_dir, called_as = os.path.split(sys.argv[0]) - called_as, py = os.path.splitext(called_as) - if '-q' in sys.argv: - from test import test_support - test_support.verbose = 0 - __import__('test.test_' + called_as.lower()) diff --git a/python/Lib/_LWPCookieJar.py b/python/Lib/_LWPCookieJar.py deleted file mode 100755 index d91cb51404..0000000000 --- a/python/Lib/_LWPCookieJar.py +++ /dev/null @@ -1,170 +0,0 @@ -"""Load / save to libwww-perl (LWP) format files. - -Actually, the format is slightly extended from that used by LWP's -(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information -not recorded by LWP. - -It uses the version string "2.0", though really there isn't an LWP Cookies -2.0 format. This indicates that there is extra information in here -(domain_dot and # port_spec) while still being compatible with -libwww-perl, I hope. - -""" - -import time, re -from cookielib import (_warn_unhandled_exception, FileCookieJar, LoadError, - Cookie, MISSING_FILENAME_TEXT, - join_header_words, split_header_words, - iso2time, time2isoz) - -def lwp_cookie_str(cookie): - """Return string representation of Cookie in the LWP cookie file format. - - Actually, the format is extended a bit -- see module docstring. - - """ - h = [(cookie.name, cookie.value), - ("path", cookie.path), - ("domain", cookie.domain)] - if cookie.port is not None: h.append(("port", cookie.port)) - if cookie.path_specified: h.append(("path_spec", None)) - if cookie.port_specified: h.append(("port_spec", None)) - if cookie.domain_initial_dot: h.append(("domain_dot", None)) - if cookie.secure: h.append(("secure", None)) - if cookie.expires: h.append(("expires", - time2isoz(float(cookie.expires)))) - if cookie.discard: h.append(("discard", None)) - if cookie.comment: h.append(("comment", cookie.comment)) - if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) - - keys = cookie._rest.keys() - keys.sort() - for k in keys: - h.append((k, str(cookie._rest[k]))) - - h.append(("version", str(cookie.version))) - - return join_header_words([h]) - -class LWPCookieJar(FileCookieJar): - """ - The LWPCookieJar saves a sequence of "Set-Cookie3" lines. - "Set-Cookie3" is the format used by the libwww-perl library, not known - to be compatible with any browser, but which is easy to read and - doesn't lose information about RFC 2965 cookies. - - Additional methods - - as_lwp_str(ignore_discard=True, ignore_expired=True) - - """ - - def as_lwp_str(self, ignore_discard=True, ignore_expires=True): - """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. - - ignore_discard and ignore_expires: see docstring for FileCookieJar.save - - """ - now = time.time() - r = [] - for cookie in self: - if not ignore_discard and cookie.discard: - continue - if not ignore_expires and cookie.is_expired(now): - continue - r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) - return "\n".join(r+[""]) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename, "w") - try: - # There really isn't an LWP Cookies 2.0 format, but this indicates - # that there is extra information in here (domain_dot and - # port_spec) while still being compatible with libwww-perl, I hope. - f.write("#LWP-Cookies-2.0\n") - f.write(self.as_lwp_str(ignore_discard, ignore_expires)) - finally: - f.close() - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - magic = f.readline() - if not re.search(self.magic_re, magic): - msg = ("%r does not look like a Set-Cookie3 (LWP) format " - "file" % filename) - raise LoadError(msg) - - now = time.time() - - header = "Set-Cookie3:" - boolean_attrs = ("port_spec", "path_spec", "domain_dot", - "secure", "discard") - value_attrs = ("version", - "port", "path", "domain", - "expires", - "comment", "commenturl") - - try: - while 1: - line = f.readline() - if line == "": break - if not line.startswith(header): - continue - line = line[len(header):].strip() - - for data in split_header_words([line]): - name, value = data[0] - standard = {} - rest = {} - for k in boolean_attrs: - standard[k] = False - for k, v in data[1:]: - if k is not None: - lc = k.lower() - else: - lc = None - # don't lose case distinction for unknown fields - if (lc in value_attrs) or (lc in boolean_attrs): - k = lc - if k in boolean_attrs: - if v is None: v = True - standard[k] = v - elif k in value_attrs: - standard[k] = v - else: - rest[k] = v - - h = standard.get - expires = h("expires") - discard = h("discard") - if expires is not None: - expires = iso2time(expires) - if expires is None: - discard = True - domain = h("domain") - domain_specified = domain.startswith(".") - c = Cookie(h("version"), name, value, - h("port"), h("port_spec"), - domain, domain_specified, h("domain_dot"), - h("path"), h("path_spec"), - h("secure"), - expires, - discard, - h("comment"), - h("commenturl"), - rest) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - - except IOError: - raise - except Exception: - _warn_unhandled_exception() - raise LoadError("invalid Set-Cookie3 format file %r: %r" % - (filename, line)) diff --git a/python/Lib/_MozillaCookieJar.py b/python/Lib/_MozillaCookieJar.py deleted file mode 100755 index 585bc179ee..0000000000 --- a/python/Lib/_MozillaCookieJar.py +++ /dev/null @@ -1,149 +0,0 @@ -"""Mozilla / Netscape cookie loading / saving.""" - -import re, time - -from cookielib import (_warn_unhandled_exception, FileCookieJar, LoadError, - Cookie, MISSING_FILENAME_TEXT) - -class MozillaCookieJar(FileCookieJar): - """ - - WARNING: you may want to backup your browser's cookies file if you use - this class to save cookies. I *think* it works, but there have been - bugs in the past! - - This class differs from CookieJar only in the format it uses to save and - load cookies to and from a file. This class uses the Mozilla/Netscape - `cookies.txt' format. lynx uses this file format, too. - - Don't expect cookies saved while the browser is running to be noticed by - the browser (in fact, Mozilla on unix will overwrite your saved cookies if - you change them on disk while it's running; on Windows, you probably can't - save at all while the browser is running). - - Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to - Netscape cookies on saving. - - In particular, the cookie version and port number information is lost, - together with information about whether or not Path, Port and Discard were - specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the - domain as set in the HTTP header started with a dot (yes, I'm aware some - domains in Netscape files start with a dot and some don't -- trust me, you - really don't want to know any more about this). - - Note that though Mozilla and Netscape use the same format, they use - slightly different headers. The class saves cookies using the Netscape - header by default (Mozilla can cope with that). - - """ - magic_re = "#( Netscape)? HTTP Cookie File" - header = """\ -# Netscape HTTP Cookie File -# http://curl.haxx.se/rfc/cookie_spec.html -# This is a generated file! Do not edit. - -""" - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - now = time.time() - - magic = f.readline() - if not re.search(self.magic_re, magic): - f.close() - raise LoadError( - "%r does not look like a Netscape format cookies file" % - filename) - - try: - while 1: - line = f.readline() - if line == "": break - - # last field may be absent, so keep any trailing tab - if line.endswith("\n"): line = line[:-1] - - # skip comments and blank lines XXX what is $ for? - if (line.strip().startswith(("#", "$")) or - line.strip() == ""): - continue - - domain, domain_specified, path, secure, expires, name, value = \ - line.split("\t") - secure = (secure == "TRUE") - domain_specified = (domain_specified == "TRUE") - if name == "": - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas cookielib regards it as a - # cookie with no value. - name = value - value = None - - initial_dot = domain.startswith(".") - assert domain_specified == initial_dot - - discard = False - if expires == "": - expires = None - discard = True - - # assume path_specified is false - c = Cookie(0, name, value, - None, False, - domain, domain_specified, initial_dot, - path, False, - secure, - expires, - discard, - None, - None, - {}) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - - except IOError: - raise - except Exception: - _warn_unhandled_exception() - raise LoadError("invalid Netscape format cookies file %r: %r" % - (filename, line)) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename, "w") - try: - f.write(self.header) - now = time.time() - for cookie in self: - if not ignore_discard and cookie.discard: - continue - if not ignore_expires and cookie.is_expired(now): - continue - if cookie.secure: secure = "TRUE" - else: secure = "FALSE" - if cookie.domain.startswith("."): initial_dot = "TRUE" - else: initial_dot = "FALSE" - if cookie.expires is not None: - expires = str(cookie.expires) - else: - expires = "" - if cookie.value is None: - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas cookielib regards it as a - # cookie with no value. - name = "" - value = cookie.name - else: - name = cookie.name - value = cookie.value - f.write( - "\t".join([cookie.domain, initial_dot, cookie.path, - secure, expires, name, value])+ - "\n") - finally: - f.close() diff --git a/python/Lib/__future__.py b/python/Lib/__future__.py deleted file mode 100755 index e0996eb007..0000000000 --- a/python/Lib/__future__.py +++ /dev/null @@ -1,128 +0,0 @@ -"""Record of phased-in incompatible language changes. - -Each line is of the form: - - FeatureName = "_Feature(" OptionalRelease "," MandatoryRelease "," - CompilerFlag ")" - -where, normally, OptionalRelease < MandatoryRelease, and both are 5-tuples -of the same form as sys.version_info: - - (PY_MAJOR_VERSION, # the 2 in 2.1.0a3; an int - PY_MINOR_VERSION, # the 1; an int - PY_MICRO_VERSION, # the 0; an int - PY_RELEASE_LEVEL, # "alpha", "beta", "candidate" or "final"; string - PY_RELEASE_SERIAL # the 3; an int - ) - -OptionalRelease records the first release in which - - from __future__ import FeatureName - -was accepted. - -In the case of MandatoryReleases that have not yet occurred, -MandatoryRelease predicts the release in which the feature will become part -of the language. - -Else MandatoryRelease records when the feature became part of the language; -in releases at or after that, modules no longer need - - from __future__ import FeatureName - -to use the feature in question, but may continue to use such imports. - -MandatoryRelease may also be None, meaning that a planned feature got -dropped. - -Instances of class _Feature have two corresponding methods, -.getOptionalRelease() and .getMandatoryRelease(). - -CompilerFlag is the (bitfield) flag that should be passed in the fourth -argument to the builtin function compile() to enable the feature in -dynamically compiled code. This flag is stored in the .compiler_flag -attribute on _Future instances. These values must match the appropriate -#defines of CO_xxx flags in Include/compile.h. - -No feature line is ever to be deleted from this file. -""" - -all_feature_names = [ - "nested_scopes", - "generators", - "division", - "absolute_import", - "with_statement", - "print_function", - "unicode_literals", -] - -__all__ = ["all_feature_names"] + all_feature_names - -# The CO_xxx symbols are defined here under the same names used by -# compile.h, so that an editor search will find them here. However, -# they're not exported in __all__, because they don't really belong to -# this module. -CO_NESTED = 0x0010 # nested_scopes -CO_GENERATOR_ALLOWED = 0 # generators (obsolete, was 0x1000) -CO_FUTURE_DIVISION = 0x2000 # division -CO_FUTURE_ABSOLUTE_IMPORT = 0x4000 # perform absolute imports by default -CO_FUTURE_WITH_STATEMENT = 0x8000 # with statement -CO_FUTURE_PRINT_FUNCTION = 0x10000 # print function -CO_FUTURE_UNICODE_LITERALS = 0x20000 # unicode string literals - -class _Feature: - def __init__(self, optionalRelease, mandatoryRelease, compiler_flag): - self.optional = optionalRelease - self.mandatory = mandatoryRelease - self.compiler_flag = compiler_flag - - def getOptionalRelease(self): - """Return first release in which this feature was recognized. - - This is a 5-tuple, of the same form as sys.version_info. - """ - - return self.optional - - def getMandatoryRelease(self): - """Return release in which this feature will become mandatory. - - This is a 5-tuple, of the same form as sys.version_info, or, if - the feature was dropped, is None. - """ - - return self.mandatory - - def __repr__(self): - return "_Feature" + repr((self.optional, - self.mandatory, - self.compiler_flag)) - -nested_scopes = _Feature((2, 1, 0, "beta", 1), - (2, 2, 0, "alpha", 0), - CO_NESTED) - -generators = _Feature((2, 2, 0, "alpha", 1), - (2, 3, 0, "final", 0), - CO_GENERATOR_ALLOWED) - -division = _Feature((2, 2, 0, "alpha", 2), - (3, 0, 0, "alpha", 0), - CO_FUTURE_DIVISION) - -absolute_import = _Feature((2, 5, 0, "alpha", 1), - (3, 0, 0, "alpha", 0), - CO_FUTURE_ABSOLUTE_IMPORT) - -with_statement = _Feature((2, 5, 0, "alpha", 1), - (2, 6, 0, "alpha", 0), - CO_FUTURE_WITH_STATEMENT) - -print_function = _Feature((2, 6, 0, "alpha", 2), - (3, 0, 0, "alpha", 0), - CO_FUTURE_PRINT_FUNCTION) - -unicode_literals = _Feature((2, 6, 0, "alpha", 2), - (3, 0, 0, "alpha", 0), - CO_FUTURE_UNICODE_LITERALS) diff --git a/python/Lib/__phello__.foo.py b/python/Lib/__phello__.foo.py deleted file mode 100755 index 8e8623ee1d..0000000000 --- a/python/Lib/__phello__.foo.py +++ /dev/null @@ -1 +0,0 @@ -# This file exists as a helper for the test.test_frozen module. diff --git a/python/Lib/_abcoll.py b/python/Lib/_abcoll.py deleted file mode 100755 index b643692e39..0000000000 --- a/python/Lib/_abcoll.py +++ /dev/null @@ -1,695 +0,0 @@ -# Copyright 2007 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Abstract Base Classes (ABCs) for collections, according to PEP 3119. - -DON'T USE THIS MODULE DIRECTLY! The classes here should be imported -via collections; they are defined here only to alleviate certain -bootstrapping issues. Unit tests are in test_collections. -""" - -from abc import ABCMeta, abstractmethod -import sys - -__all__ = ["Hashable", "Iterable", "Iterator", - "Sized", "Container", "Callable", - "Set", "MutableSet", - "Mapping", "MutableMapping", - "MappingView", "KeysView", "ItemsView", "ValuesView", - "Sequence", "MutableSequence", - ] - -### ONE-TRICK PONIES ### - -def _hasattr(C, attr): - try: - return any(attr in B.__dict__ for B in C.__mro__) - except AttributeError: - # Old-style class - return hasattr(C, attr) - - -class Hashable: - __metaclass__ = ABCMeta - - @abstractmethod - def __hash__(self): - return 0 - - @classmethod - def __subclasshook__(cls, C): - if cls is Hashable: - try: - for B in C.__mro__: - if "__hash__" in B.__dict__: - if B.__dict__["__hash__"]: - return True - break - except AttributeError: - # Old-style class - if getattr(C, "__hash__", None): - return True - return NotImplemented - - -class Iterable: - __metaclass__ = ABCMeta - - @abstractmethod - def __iter__(self): - while False: - yield None - - @classmethod - def __subclasshook__(cls, C): - if cls is Iterable: - if _hasattr(C, "__iter__"): - return True - return NotImplemented - -Iterable.register(str) - - -class Iterator(Iterable): - - @abstractmethod - def next(self): - 'Return the next item from the iterator. When exhausted, raise StopIteration' - raise StopIteration - - def __iter__(self): - return self - - @classmethod - def __subclasshook__(cls, C): - if cls is Iterator: - if _hasattr(C, "next") and _hasattr(C, "__iter__"): - return True - return NotImplemented - - -class Sized: - __metaclass__ = ABCMeta - - @abstractmethod - def __len__(self): - return 0 - - @classmethod - def __subclasshook__(cls, C): - if cls is Sized: - if _hasattr(C, "__len__"): - return True - return NotImplemented - - -class Container: - __metaclass__ = ABCMeta - - @abstractmethod - def __contains__(self, x): - return False - - @classmethod - def __subclasshook__(cls, C): - if cls is Container: - if _hasattr(C, "__contains__"): - return True - return NotImplemented - - -class Callable: - __metaclass__ = ABCMeta - - @abstractmethod - def __call__(self, *args, **kwds): - return False - - @classmethod - def __subclasshook__(cls, C): - if cls is Callable: - if _hasattr(C, "__call__"): - return True - return NotImplemented - - -### SETS ### - - -class Set(Sized, Iterable, Container): - """A set is a finite, iterable container. - - This class provides concrete generic implementations of all - methods except for __contains__, __iter__ and __len__. - - To override the comparisons (presumably for speed, as the - semantics are fixed), redefine __le__ and __ge__, - then the other operations will automatically follow suit. - """ - - def __le__(self, other): - if not isinstance(other, Set): - return NotImplemented - if len(self) > len(other): - return False - for elem in self: - if elem not in other: - return False - return True - - def __lt__(self, other): - if not isinstance(other, Set): - return NotImplemented - return len(self) < len(other) and self.__le__(other) - - def __gt__(self, other): - if not isinstance(other, Set): - return NotImplemented - return len(self) > len(other) and self.__ge__(other) - - def __ge__(self, other): - if not isinstance(other, Set): - return NotImplemented - if len(self) < len(other): - return False - for elem in other: - if elem not in self: - return False - return True - - def __eq__(self, other): - if not isinstance(other, Set): - return NotImplemented - return len(self) == len(other) and self.__le__(other) - - def __ne__(self, other): - return not (self == other) - - @classmethod - def _from_iterable(cls, it): - '''Construct an instance of the class from any iterable input. - - Must override this method if the class constructor signature - does not accept an iterable for an input. - ''' - return cls(it) - - def __and__(self, other): - if not isinstance(other, Iterable): - return NotImplemented - return self._from_iterable(value for value in other if value in self) - - __rand__ = __and__ - - def isdisjoint(self, other): - 'Return True if two sets have a null intersection.' - for value in other: - if value in self: - return False - return True - - def __or__(self, other): - if not isinstance(other, Iterable): - return NotImplemented - chain = (e for s in (self, other) for e in s) - return self._from_iterable(chain) - - __ror__ = __or__ - - def __sub__(self, other): - if not isinstance(other, Set): - if not isinstance(other, Iterable): - return NotImplemented - other = self._from_iterable(other) - return self._from_iterable(value for value in self - if value not in other) - - def __rsub__(self, other): - if not isinstance(other, Set): - if not isinstance(other, Iterable): - return NotImplemented - other = self._from_iterable(other) - return self._from_iterable(value for value in other - if value not in self) - - def __xor__(self, other): - if not isinstance(other, Set): - if not isinstance(other, Iterable): - return NotImplemented - other = self._from_iterable(other) - return (self - other) | (other - self) - - __rxor__ = __xor__ - - # Sets are not hashable by default, but subclasses can change this - __hash__ = None - - def _hash(self): - """Compute the hash value of a set. - - Note that we don't define __hash__: not all sets are hashable. - But if you define a hashable set type, its __hash__ should - call this function. - - This must be compatible __eq__. - - All sets ought to compare equal if they contain the same - elements, regardless of how they are implemented, and - regardless of the order of the elements; so there's not much - freedom for __eq__ or __hash__. We match the algorithm used - by the built-in frozenset type. - """ - MAX = sys.maxint - MASK = 2 * MAX + 1 - n = len(self) - h = 1927868237 * (n + 1) - h &= MASK - for x in self: - hx = hash(x) - h ^= (hx ^ (hx << 16) ^ 89869747) * 3644798167 - h &= MASK - h = h * 69069 + 907133923 - h &= MASK - if h > MAX: - h -= MASK + 1 - if h == -1: - h = 590923713 - return h - -Set.register(frozenset) - - -class MutableSet(Set): - """A mutable set is a finite, iterable container. - - This class provides concrete generic implementations of all - methods except for __contains__, __iter__, __len__, - add(), and discard(). - - To override the comparisons (presumably for speed, as the - semantics are fixed), all you have to do is redefine __le__ and - then the other operations will automatically follow suit. - """ - - @abstractmethod - def add(self, value): - """Add an element.""" - raise NotImplementedError - - @abstractmethod - def discard(self, value): - """Remove an element. Do not raise an exception if absent.""" - raise NotImplementedError - - def remove(self, value): - """Remove an element. If not a member, raise a KeyError.""" - if value not in self: - raise KeyError(value) - self.discard(value) - - def pop(self): - """Return the popped value. Raise KeyError if empty.""" - it = iter(self) - try: - value = next(it) - except StopIteration: - raise KeyError - self.discard(value) - return value - - def clear(self): - """This is slow (creates N new iterators!) but effective.""" - try: - while True: - self.pop() - except KeyError: - pass - - def __ior__(self, it): - for value in it: - self.add(value) - return self - - def __iand__(self, it): - for value in (self - it): - self.discard(value) - return self - - def __ixor__(self, it): - if it is self: - self.clear() - else: - if not isinstance(it, Set): - it = self._from_iterable(it) - for value in it: - if value in self: - self.discard(value) - else: - self.add(value) - return self - - def __isub__(self, it): - if it is self: - self.clear() - else: - for value in it: - self.discard(value) - return self - -MutableSet.register(set) - - -### MAPPINGS ### - - -class Mapping(Sized, Iterable, Container): - - """A Mapping is a generic container for associating key/value - pairs. - - This class provides concrete generic implementations of all - methods except for __getitem__, __iter__, and __len__. - - """ - - @abstractmethod - def __getitem__(self, key): - raise KeyError - - def get(self, key, default=None): - 'D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.' - try: - return self[key] - except KeyError: - return default - - def __contains__(self, key): - try: - self[key] - except KeyError: - return False - else: - return True - - def iterkeys(self): - 'D.iterkeys() -> an iterator over the keys of D' - return iter(self) - - def itervalues(self): - 'D.itervalues() -> an iterator over the values of D' - for key in self: - yield self[key] - - def iteritems(self): - 'D.iteritems() -> an iterator over the (key, value) items of D' - for key in self: - yield (key, self[key]) - - def keys(self): - "D.keys() -> list of D's keys" - return list(self) - - def items(self): - "D.items() -> list of D's (key, value) pairs, as 2-tuples" - return [(key, self[key]) for key in self] - - def values(self): - "D.values() -> list of D's values" - return [self[key] for key in self] - - # Mappings are not hashable by default, but subclasses can change this - __hash__ = None - - def __eq__(self, other): - if not isinstance(other, Mapping): - return NotImplemented - return dict(self.items()) == dict(other.items()) - - def __ne__(self, other): - return not (self == other) - -class MappingView(Sized): - - def __init__(self, mapping): - self._mapping = mapping - - def __len__(self): - return len(self._mapping) - - def __repr__(self): - return '{0.__class__.__name__}({0._mapping!r})'.format(self) - - -class KeysView(MappingView, Set): - - @classmethod - def _from_iterable(self, it): - return set(it) - - def __contains__(self, key): - return key in self._mapping - - def __iter__(self): - for key in self._mapping: - yield key - -KeysView.register(type({}.viewkeys())) - -class ItemsView(MappingView, Set): - - @classmethod - def _from_iterable(self, it): - return set(it) - - def __contains__(self, item): - key, value = item - try: - v = self._mapping[key] - except KeyError: - return False - else: - return v == value - - def __iter__(self): - for key in self._mapping: - yield (key, self._mapping[key]) - -ItemsView.register(type({}.viewitems())) - -class ValuesView(MappingView): - - def __contains__(self, value): - for key in self._mapping: - if value == self._mapping[key]: - return True - return False - - def __iter__(self): - for key in self._mapping: - yield self._mapping[key] - -ValuesView.register(type({}.viewvalues())) - -class MutableMapping(Mapping): - - """A MutableMapping is a generic container for associating - key/value pairs. - - This class provides concrete generic implementations of all - methods except for __getitem__, __setitem__, __delitem__, - __iter__, and __len__. - - """ - - @abstractmethod - def __setitem__(self, key, value): - raise KeyError - - @abstractmethod - def __delitem__(self, key): - raise KeyError - - __marker = object() - - def pop(self, key, default=__marker): - '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised. - ''' - try: - value = self[key] - except KeyError: - if default is self.__marker: - raise - return default - else: - del self[key] - return value - - def popitem(self): - '''D.popitem() -> (k, v), remove and return some (key, value) pair - as a 2-tuple; but raise KeyError if D is empty. - ''' - try: - key = next(iter(self)) - except StopIteration: - raise KeyError - value = self[key] - del self[key] - return key, value - - def clear(self): - 'D.clear() -> None. Remove all items from D.' - try: - while True: - self.popitem() - except KeyError: - pass - - def update(*args, **kwds): - ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. - If E present and has a .keys() method, does: for k in E: D[k] = E[k] - If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v - In either case, this is followed by: for k, v in F.items(): D[k] = v - ''' - if not args: - raise TypeError("descriptor 'update' of 'MutableMapping' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('update expected at most 1 arguments, got %d' % - len(args)) - if args: - other = args[0] - if isinstance(other, Mapping): - for key in other: - self[key] = other[key] - elif hasattr(other, "keys"): - for key in other.keys(): - self[key] = other[key] - else: - for key, value in other: - self[key] = value - for key, value in kwds.items(): - self[key] = value - - def setdefault(self, key, default=None): - 'D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D' - try: - return self[key] - except KeyError: - self[key] = default - return default - -MutableMapping.register(dict) - - -### SEQUENCES ### - - -class Sequence(Sized, Iterable, Container): - """All the operations on a read-only sequence. - - Concrete subclasses must override __new__ or __init__, - __getitem__, and __len__. - """ - - @abstractmethod - def __getitem__(self, index): - raise IndexError - - def __iter__(self): - i = 0 - try: - while True: - v = self[i] - yield v - i += 1 - except IndexError: - return - - def __contains__(self, value): - for v in self: - if v == value: - return True - return False - - def __reversed__(self): - for i in reversed(range(len(self))): - yield self[i] - - def index(self, value): - '''S.index(value) -> integer -- return first index of value. - Raises ValueError if the value is not present. - ''' - for i, v in enumerate(self): - if v == value: - return i - raise ValueError - - def count(self, value): - 'S.count(value) -> integer -- return number of occurrences of value' - return sum(1 for v in self if v == value) - -Sequence.register(tuple) -Sequence.register(basestring) -Sequence.register(buffer) -Sequence.register(xrange) - - -class MutableSequence(Sequence): - - """All the operations on a read-only sequence. - - Concrete subclasses must provide __new__ or __init__, - __getitem__, __setitem__, __delitem__, __len__, and insert(). - - """ - - @abstractmethod - def __setitem__(self, index, value): - raise IndexError - - @abstractmethod - def __delitem__(self, index): - raise IndexError - - @abstractmethod - def insert(self, index, value): - 'S.insert(index, object) -- insert object before index' - raise IndexError - - def append(self, value): - 'S.append(object) -- append object to the end of the sequence' - self.insert(len(self), value) - - def reverse(self): - 'S.reverse() -- reverse *IN PLACE*' - n = len(self) - for i in range(n//2): - self[i], self[n-i-1] = self[n-i-1], self[i] - - def extend(self, values): - 'S.extend(iterable) -- extend sequence by appending elements from the iterable' - for v in values: - self.append(v) - - def pop(self, index=-1): - '''S.pop([index]) -> item -- remove and return item at index (default last). - Raise IndexError if list is empty or index is out of range. - ''' - v = self[index] - del self[index] - return v - - def remove(self, value): - '''S.remove(value) -- remove first occurrence of value. - Raise ValueError if the value is not present. - ''' - del self[self.index(value)] - - def __iadd__(self, values): - self.extend(values) - return self - -MutableSequence.register(list) diff --git a/python/Lib/_osx_support.py b/python/Lib/_osx_support.py deleted file mode 100644 index d2aaae7986..0000000000 --- a/python/Lib/_osx_support.py +++ /dev/null @@ -1,502 +0,0 @@ -"""Shared OS X support functions.""" - -import os -import re -import sys - -__all__ = [ - 'compiler_fixup', - 'customize_config_vars', - 'customize_compiler', - 'get_platform_osx', -] - -# configuration variables that may contain universal build flags, -# like "-arch" or "-isdkroot", that may need customization for -# the user environment -_UNIVERSAL_CONFIG_VARS = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS', 'BASECFLAGS', - 'BLDSHARED', 'LDSHARED', 'CC', 'CXX', - 'PY_CFLAGS', 'PY_LDFLAGS', 'PY_CPPFLAGS', - 'PY_CORE_CFLAGS') - -# configuration variables that may contain compiler calls -_COMPILER_CONFIG_VARS = ('BLDSHARED', 'LDSHARED', 'CC', 'CXX') - -# prefix added to original configuration variable names -_INITPRE = '_OSX_SUPPORT_INITIAL_' - - -def _find_executable(executable, path=None): - """Tries to find 'executable' in the directories listed in 'path'. - - A string listing directories separated by 'os.pathsep'; defaults to - os.environ['PATH']. Returns the complete filename or None if not found. - """ - if path is None: - path = os.environ['PATH'] - - paths = path.split(os.pathsep) - base, ext = os.path.splitext(executable) - - if (sys.platform == 'win32' or os.name == 'os2') and (ext != '.exe'): - executable = executable + '.exe' - - if not os.path.isfile(executable): - for p in paths: - f = os.path.join(p, executable) - if os.path.isfile(f): - # the file exists, we have a shot at spawn working - return f - return None - else: - return executable - - -def _read_output(commandstring): - """Output from successful command execution or None""" - # Similar to os.popen(commandstring, "r").read(), - # but without actually using os.popen because that - # function is not usable during python bootstrap. - # tempfile is also not available then. - import contextlib - try: - import tempfile - fp = tempfile.NamedTemporaryFile() - except ImportError: - fp = open("/tmp/_osx_support.%s"%( - os.getpid(),), "w+b") - - with contextlib.closing(fp) as fp: - cmd = "%s 2>/dev/null >'%s'" % (commandstring, fp.name) - return fp.read().strip() if not os.system(cmd) else None - - -def _find_build_tool(toolname): - """Find a build tool on current path or using xcrun""" - return (_find_executable(toolname) - or _read_output("/usr/bin/xcrun -find %s" % (toolname,)) - or '' - ) - -_SYSTEM_VERSION = None - -def _get_system_version(): - """Return the OS X system version as a string""" - # Reading this plist is a documented way to get the system - # version (see the documentation for the Gestalt Manager) - # We avoid using platform.mac_ver to avoid possible bootstrap issues during - # the build of Python itself (distutils is used to build standard library - # extensions). - - global _SYSTEM_VERSION - - if _SYSTEM_VERSION is None: - _SYSTEM_VERSION = '' - try: - f = open('/System/Library/CoreServices/SystemVersion.plist') - except IOError: - # We're on a plain darwin box, fall back to the default - # behaviour. - pass - else: - try: - m = re.search(r'ProductUserVisibleVersion\s*' - r'(.*?)', f.read()) - finally: - f.close() - if m is not None: - _SYSTEM_VERSION = '.'.join(m.group(1).split('.')[:2]) - # else: fall back to the default behaviour - - return _SYSTEM_VERSION - -def _remove_original_values(_config_vars): - """Remove original unmodified values for testing""" - # This is needed for higher-level cross-platform tests of get_platform. - for k in list(_config_vars): - if k.startswith(_INITPRE): - del _config_vars[k] - -def _save_modified_value(_config_vars, cv, newvalue): - """Save modified and original unmodified value of configuration var""" - - oldvalue = _config_vars.get(cv, '') - if (oldvalue != newvalue) and (_INITPRE + cv not in _config_vars): - _config_vars[_INITPRE + cv] = oldvalue - _config_vars[cv] = newvalue - -def _supports_universal_builds(): - """Returns True if universal builds are supported on this system""" - # As an approximation, we assume that if we are running on 10.4 or above, - # then we are running with an Xcode environment that supports universal - # builds, in particular -isysroot and -arch arguments to the compiler. This - # is in support of allowing 10.4 universal builds to run on 10.3.x systems. - - osx_version = _get_system_version() - if osx_version: - try: - osx_version = tuple(int(i) for i in osx_version.split('.')) - except ValueError: - osx_version = '' - return bool(osx_version >= (10, 4)) if osx_version else False - - -def _find_appropriate_compiler(_config_vars): - """Find appropriate C compiler for extension module builds""" - - # Issue #13590: - # The OSX location for the compiler varies between OSX - # (or rather Xcode) releases. With older releases (up-to 10.5) - # the compiler is in /usr/bin, with newer releases the compiler - # can only be found inside Xcode.app if the "Command Line Tools" - # are not installed. - # - # Furthermore, the compiler that can be used varies between - # Xcode releases. Up to Xcode 4 it was possible to use 'gcc-4.2' - # as the compiler, after that 'clang' should be used because - # gcc-4.2 is either not present, or a copy of 'llvm-gcc' that - # miscompiles Python. - - # skip checks if the compiler was overridden with a CC env variable - if 'CC' in os.environ: - return _config_vars - - # The CC config var might contain additional arguments. - # Ignore them while searching. - cc = oldcc = _config_vars['CC'].split()[0] - if not _find_executable(cc): - # Compiler is not found on the shell search PATH. - # Now search for clang, first on PATH (if the Command LIne - # Tools have been installed in / or if the user has provided - # another location via CC). If not found, try using xcrun - # to find an uninstalled clang (within a selected Xcode). - - # NOTE: Cannot use subprocess here because of bootstrap - # issues when building Python itself (and os.popen is - # implemented on top of subprocess and is therefore not - # usable as well) - - cc = _find_build_tool('clang') - - elif os.path.basename(cc).startswith('gcc'): - # Compiler is GCC, check if it is LLVM-GCC - data = _read_output("'%s' --version" - % (cc.replace("'", "'\"'\"'"),)) - if data and 'llvm-gcc' in data: - # Found LLVM-GCC, fall back to clang - cc = _find_build_tool('clang') - - if not cc: - raise SystemError( - "Cannot locate working compiler") - - if cc != oldcc: - # Found a replacement compiler. - # Modify config vars using new compiler, if not already explicitly - # overridden by an env variable, preserving additional arguments. - for cv in _COMPILER_CONFIG_VARS: - if cv in _config_vars and cv not in os.environ: - cv_split = _config_vars[cv].split() - cv_split[0] = cc if cv != 'CXX' else cc + '++' - _save_modified_value(_config_vars, cv, ' '.join(cv_split)) - - return _config_vars - - -def _remove_universal_flags(_config_vars): - """Remove all universal build arguments from config vars""" - - for cv in _UNIVERSAL_CONFIG_VARS: - # Do not alter a config var explicitly overridden by env var - if cv in _config_vars and cv not in os.environ: - flags = _config_vars[cv] - flags = re.sub('-arch\s+\w+\s', ' ', flags) - flags = re.sub('-isysroot [^ \t]*', ' ', flags) - _save_modified_value(_config_vars, cv, flags) - - return _config_vars - - -def _remove_unsupported_archs(_config_vars): - """Remove any unsupported archs from config vars""" - # Different Xcode releases support different sets for '-arch' - # flags. In particular, Xcode 4.x no longer supports the - # PPC architectures. - # - # This code automatically removes '-arch ppc' and '-arch ppc64' - # when these are not supported. That makes it possible to - # build extensions on OSX 10.7 and later with the prebuilt - # 32-bit installer on the python.org website. - - # skip checks if the compiler was overridden with a CC env variable - if 'CC' in os.environ: - return _config_vars - - if re.search('-arch\s+ppc', _config_vars['CFLAGS']) is not None: - # NOTE: Cannot use subprocess here because of bootstrap - # issues when building Python itself - status = os.system( - """echo 'int main{};' | """ - """'%s' -c -arch ppc -x c -o /dev/null /dev/null 2>/dev/null""" - %(_config_vars['CC'].replace("'", "'\"'\"'"),)) - if status: - # The compile failed for some reason. Because of differences - # across Xcode and compiler versions, there is no reliable way - # to be sure why it failed. Assume here it was due to lack of - # PPC support and remove the related '-arch' flags from each - # config variables not explicitly overridden by an environment - # variable. If the error was for some other reason, we hope the - # failure will show up again when trying to compile an extension - # module. - for cv in _UNIVERSAL_CONFIG_VARS: - if cv in _config_vars and cv not in os.environ: - flags = _config_vars[cv] - flags = re.sub('-arch\s+ppc\w*\s', ' ', flags) - _save_modified_value(_config_vars, cv, flags) - - return _config_vars - - -def _override_all_archs(_config_vars): - """Allow override of all archs with ARCHFLAGS env var""" - # NOTE: This name was introduced by Apple in OSX 10.5 and - # is used by several scripting languages distributed with - # that OS release. - if 'ARCHFLAGS' in os.environ: - arch = os.environ['ARCHFLAGS'] - for cv in _UNIVERSAL_CONFIG_VARS: - if cv in _config_vars and '-arch' in _config_vars[cv]: - flags = _config_vars[cv] - flags = re.sub('-arch\s+\w+\s', ' ', flags) - flags = flags + ' ' + arch - _save_modified_value(_config_vars, cv, flags) - - return _config_vars - - -def _check_for_unavailable_sdk(_config_vars): - """Remove references to any SDKs not available""" - # If we're on OSX 10.5 or later and the user tries to - # compile an extension using an SDK that is not present - # on the current machine it is better to not use an SDK - # than to fail. This is particularly important with - # the standalone Command Line Tools alternative to a - # full-blown Xcode install since the CLT packages do not - # provide SDKs. If the SDK is not present, it is assumed - # that the header files and dev libs have been installed - # to /usr and /System/Library by either a standalone CLT - # package or the CLT component within Xcode. - cflags = _config_vars.get('CFLAGS', '') - m = re.search(r'-isysroot\s+(\S+)', cflags) - if m is not None: - sdk = m.group(1) - if not os.path.exists(sdk): - for cv in _UNIVERSAL_CONFIG_VARS: - # Do not alter a config var explicitly overridden by env var - if cv in _config_vars and cv not in os.environ: - flags = _config_vars[cv] - flags = re.sub(r'-isysroot\s+\S+(?:\s|$)', ' ', flags) - _save_modified_value(_config_vars, cv, flags) - - return _config_vars - - -def compiler_fixup(compiler_so, cc_args): - """ - This function will strip '-isysroot PATH' and '-arch ARCH' from the - compile flags if the user has specified one them in extra_compile_flags. - - This is needed because '-arch ARCH' adds another architecture to the - build, without a way to remove an architecture. Furthermore GCC will - barf if multiple '-isysroot' arguments are present. - """ - stripArch = stripSysroot = False - - compiler_so = list(compiler_so) - - if not _supports_universal_builds(): - # OSX before 10.4.0, these don't support -arch and -isysroot at - # all. - stripArch = stripSysroot = True - else: - stripArch = '-arch' in cc_args - stripSysroot = '-isysroot' in cc_args - - if stripArch or 'ARCHFLAGS' in os.environ: - while True: - try: - index = compiler_so.index('-arch') - # Strip this argument and the next one: - del compiler_so[index:index+2] - except ValueError: - break - - if 'ARCHFLAGS' in os.environ and not stripArch: - # User specified different -arch flags in the environ, - # see also distutils.sysconfig - compiler_so = compiler_so + os.environ['ARCHFLAGS'].split() - - if stripSysroot: - while True: - try: - index = compiler_so.index('-isysroot') - # Strip this argument and the next one: - del compiler_so[index:index+2] - except ValueError: - break - - # Check if the SDK that is used during compilation actually exists, - # the universal build requires the usage of a universal SDK and not all - # users have that installed by default. - sysroot = None - if '-isysroot' in cc_args: - idx = cc_args.index('-isysroot') - sysroot = cc_args[idx+1] - elif '-isysroot' in compiler_so: - idx = compiler_so.index('-isysroot') - sysroot = compiler_so[idx+1] - - if sysroot and not os.path.isdir(sysroot): - from distutils import log - log.warn("Compiling with an SDK that doesn't seem to exist: %s", - sysroot) - log.warn("Please check your Xcode installation") - - return compiler_so - - -def customize_config_vars(_config_vars): - """Customize Python build configuration variables. - - Called internally from sysconfig with a mutable mapping - containing name/value pairs parsed from the configured - makefile used to build this interpreter. Returns - the mapping updated as needed to reflect the environment - in which the interpreter is running; in the case of - a Python from a binary installer, the installed - environment may be very different from the build - environment, i.e. different OS levels, different - built tools, different available CPU architectures. - - This customization is performed whenever - distutils.sysconfig.get_config_vars() is first - called. It may be used in environments where no - compilers are present, i.e. when installing pure - Python dists. Customization of compiler paths - and detection of unavailable archs is deferred - until the first extension module build is - requested (in distutils.sysconfig.customize_compiler). - - Currently called from distutils.sysconfig - """ - - if not _supports_universal_builds(): - # On Mac OS X before 10.4, check if -arch and -isysroot - # are in CFLAGS or LDFLAGS and remove them if they are. - # This is needed when building extensions on a 10.3 system - # using a universal build of python. - _remove_universal_flags(_config_vars) - - # Allow user to override all archs with ARCHFLAGS env var - _override_all_archs(_config_vars) - - # Remove references to sdks that are not found - _check_for_unavailable_sdk(_config_vars) - - return _config_vars - - -def customize_compiler(_config_vars): - """Customize compiler path and configuration variables. - - This customization is performed when the first - extension module build is requested - in distutils.sysconfig.customize_compiler). - """ - - # Find a compiler to use for extension module builds - _find_appropriate_compiler(_config_vars) - - # Remove ppc arch flags if not supported here - _remove_unsupported_archs(_config_vars) - - # Allow user to override all archs with ARCHFLAGS env var - _override_all_archs(_config_vars) - - return _config_vars - - -def get_platform_osx(_config_vars, osname, release, machine): - """Filter values for get_platform()""" - # called from get_platform() in sysconfig and distutils.util - # - # For our purposes, we'll assume that the system version from - # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set - # to. This makes the compatibility story a bit more sane because the - # machine is going to compile and link as if it were - # MACOSX_DEPLOYMENT_TARGET. - - macver = _config_vars.get('MACOSX_DEPLOYMENT_TARGET', '') - macrelease = _get_system_version() or macver - macver = macver or macrelease - - if macver: - release = macver - osname = "macosx" - - # Use the original CFLAGS value, if available, so that we - # return the same machine type for the platform string. - # Otherwise, distutils may consider this a cross-compiling - # case and disallow installs. - cflags = _config_vars.get(_INITPRE+'CFLAGS', - _config_vars.get('CFLAGS', '')) - if macrelease: - try: - macrelease = tuple(int(i) for i in macrelease.split('.')[0:2]) - except ValueError: - macrelease = (10, 0) - else: - # assume no universal support - macrelease = (10, 0) - - if (macrelease >= (10, 4)) and '-arch' in cflags.strip(): - # The universal build will build fat binaries, but not on - # systems before 10.4 - - machine = 'fat' - - archs = re.findall('-arch\s+(\S+)', cflags) - archs = tuple(sorted(set(archs))) - - if len(archs) == 1: - machine = archs[0] - elif archs == ('i386', 'ppc'): - machine = 'fat' - elif archs == ('i386', 'x86_64'): - machine = 'intel' - elif archs == ('i386', 'ppc', 'x86_64'): - machine = 'fat3' - elif archs == ('ppc64', 'x86_64'): - machine = 'fat64' - elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'): - machine = 'universal' - else: - raise ValueError( - "Don't know machine value for archs=%r" % (archs,)) - - elif machine == 'i386': - # On OSX the machine type returned by uname is always the - # 32-bit variant, even if the executable architecture is - # the 64-bit variant - if sys.maxint >= 2**32: - machine = 'x86_64' - - elif machine in ('PowerPC', 'Power_Macintosh'): - # Pick a sane name for the PPC architecture. - # See 'i386' case - if sys.maxint >= 2**32: - machine = 'ppc64' - else: - machine = 'ppc' - - return (osname, release, machine) diff --git a/python/Lib/_pyio.py b/python/Lib/_pyio.py deleted file mode 100644 index f022a4e88b..0000000000 --- a/python/Lib/_pyio.py +++ /dev/null @@ -1,2037 +0,0 @@ -""" -Python implementation of the io module. -""" - -from __future__ import (print_function, unicode_literals) - -import os -import abc -import codecs -import sys -import warnings -import errno -# Import thread instead of threading to reduce startup cost -try: - from thread import allocate_lock as Lock -except ImportError: - from dummy_thread import allocate_lock as Lock - -import io -from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) -from errno import EINTR - -__metaclass__ = type - -# open() uses st_blksize whenever we can -DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes - -# NOTE: Base classes defined here are registered with the "official" ABCs -# defined in io.py. We don't use real inheritance though, because we don't want -# to inherit the C implementations. - - -class BlockingIOError(IOError): - - """Exception raised when I/O would block on a non-blocking I/O stream.""" - - def __init__(self, errno, strerror, characters_written=0): - super(IOError, self).__init__(errno, strerror) - if not isinstance(characters_written, (int, long)): - raise TypeError("characters_written must be a integer") - self.characters_written = characters_written - - -def open(file, mode="r", buffering=-1, - encoding=None, errors=None, - newline=None, closefd=True): - - r"""Open file and return a stream. Raise IOError upon failure. - - file is either a text or byte string giving the name (and the path - if the file isn't in the current working directory) of the file to - be opened or an integer file descriptor of the file to be - wrapped. (If a file descriptor is given, it is closed when the - returned I/O object is closed, unless closefd is set to False.) - - mode is an optional string that specifies the mode in which the file - is opened. It defaults to 'r' which means open for reading in text - mode. Other common values are 'w' for writing (truncating the file if - it already exists), and 'a' for appending (which on some Unix systems, - means that all writes append to the end of the file regardless of the - current seek position). In text mode, if encoding is not specified the - encoding used is platform dependent. (For reading and writing raw - bytes use binary mode and leave encoding unspecified.) The available - modes are: - - ========= =============================================================== - Character Meaning - --------- --------------------------------------------------------------- - 'r' open for reading (default) - 'w' open for writing, truncating the file first - 'a' open for writing, appending to the end of the file if it exists - 'b' binary mode - 't' text mode (default) - '+' open a disk file for updating (reading and writing) - 'U' universal newline mode (for backwards compatibility; unneeded - for new code) - ========= =============================================================== - - The default mode is 'rt' (open for reading text). For binary random - access, the mode 'w+b' opens and truncates the file to 0 bytes, while - 'r+b' opens the file without truncation. - - Python distinguishes between files opened in binary and text modes, - even when the underlying operating system doesn't. Files opened in - binary mode (appending 'b' to the mode argument) return contents as - bytes objects without any decoding. In text mode (the default, or when - 't' is appended to the mode argument), the contents of the file are - returned as strings, the bytes having been first decoded using a - platform-dependent encoding or using the specified encoding if given. - - buffering is an optional integer used to set the buffering policy. - Pass 0 to switch buffering off (only allowed in binary mode), 1 to select - line buffering (only usable in text mode), and an integer > 1 to indicate - the size of a fixed-size chunk buffer. When no buffering argument is - given, the default buffering policy works as follows: - - * Binary files are buffered in fixed-size chunks; the size of the buffer - is chosen using a heuristic trying to determine the underlying device's - "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. - On many systems, the buffer will typically be 4096 or 8192 bytes long. - - * "Interactive" text files (files for which isatty() returns True) - use line buffering. Other text files use the policy described above - for binary files. - - encoding is the name of the encoding used to decode or encode the - file. This should only be used in text mode. The default encoding is - platform dependent, but any encoding supported by Python can be - passed. See the codecs module for the list of supported encodings. - - errors is an optional string that specifies how encoding errors are to - be handled---this argument should not be used in binary mode. Pass - 'strict' to raise a ValueError exception if there is an encoding error - (the default of None has the same effect), or pass 'ignore' to ignore - errors. (Note that ignoring encoding errors can lead to data loss.) - See the documentation for codecs.register for a list of the permitted - encoding error strings. - - newline controls how universal newlines works (it only applies to text - mode). It can be None, '', '\n', '\r', and '\r\n'. It works as - follows: - - * On input, if newline is None, universal newlines mode is - enabled. Lines in the input can end in '\n', '\r', or '\r\n', and - these are translated into '\n' before being returned to the - caller. If it is '', universal newline mode is enabled, but line - endings are returned to the caller untranslated. If it has any of - the other legal values, input lines are only terminated by the given - string, and the line ending is returned to the caller untranslated. - - * On output, if newline is None, any '\n' characters written are - translated to the system default line separator, os.linesep. If - newline is '', no translation takes place. If newline is any of the - other legal values, any '\n' characters written are translated to - the given string. - - If closefd is False, the underlying file descriptor will be kept open - when the file is closed. This does not work when a file name is given - and must be True in that case. - - open() returns a file object whose type depends on the mode, and - through which the standard file operations such as reading and writing - are performed. When open() is used to open a file in a text mode ('w', - 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open - a file in a binary mode, the returned class varies: in read binary - mode, it returns a BufferedReader; in write binary and append binary - modes, it returns a BufferedWriter, and in read/write mode, it returns - a BufferedRandom. - - It is also possible to use a string or bytearray as a file for both - reading and writing. For strings StringIO can be used like a file - opened in a text mode, and for bytes a BytesIO can be used like a file - opened in a binary mode. - """ - if not isinstance(file, (basestring, int, long)): - raise TypeError("invalid file: %r" % file) - if not isinstance(mode, basestring): - raise TypeError("invalid mode: %r" % mode) - if not isinstance(buffering, (int, long)): - raise TypeError("invalid buffering: %r" % buffering) - if encoding is not None and not isinstance(encoding, basestring): - raise TypeError("invalid encoding: %r" % encoding) - if errors is not None and not isinstance(errors, basestring): - raise TypeError("invalid errors: %r" % errors) - modes = set(mode) - if modes - set("arwb+tU") or len(mode) > len(modes): - raise ValueError("invalid mode: %r" % mode) - reading = "r" in modes - writing = "w" in modes - appending = "a" in modes - updating = "+" in modes - text = "t" in modes - binary = "b" in modes - if "U" in modes: - if writing or appending: - raise ValueError("can't use U and writing mode at once") - reading = True - if text and binary: - raise ValueError("can't have text and binary mode at once") - if reading + writing + appending > 1: - raise ValueError("can't have read/write/append mode at once") - if not (reading or writing or appending): - raise ValueError("must have exactly one of read/write/append mode") - if binary and encoding is not None: - raise ValueError("binary mode doesn't take an encoding argument") - if binary and errors is not None: - raise ValueError("binary mode doesn't take an errors argument") - if binary and newline is not None: - raise ValueError("binary mode doesn't take a newline argument") - raw = FileIO(file, - (reading and "r" or "") + - (writing and "w" or "") + - (appending and "a" or "") + - (updating and "+" or ""), - closefd) - result = raw - try: - line_buffering = False - if buffering == 1 or buffering < 0 and raw.isatty(): - buffering = -1 - line_buffering = True - if buffering < 0: - buffering = DEFAULT_BUFFER_SIZE - try: - bs = os.fstat(raw.fileno()).st_blksize - except (os.error, AttributeError): - pass - else: - if bs > 1: - buffering = bs - if buffering < 0: - raise ValueError("invalid buffering size") - if buffering == 0: - if binary: - return result - raise ValueError("can't have unbuffered text I/O") - if updating: - buffer = BufferedRandom(raw, buffering) - elif writing or appending: - buffer = BufferedWriter(raw, buffering) - elif reading: - buffer = BufferedReader(raw, buffering) - else: - raise ValueError("unknown mode: %r" % mode) - result = buffer - if binary: - return result - text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) - result = text - text.mode = mode - return result - except: - result.close() - raise - - -class DocDescriptor: - """Helper for builtins.open.__doc__ - """ - def __get__(self, obj, typ): - return ( - "open(file, mode='r', buffering=-1, encoding=None, " - "errors=None, newline=None, closefd=True)\n\n" + - open.__doc__) - -class OpenWrapper: - """Wrapper for builtins.open - - Trick so that open won't become a bound method when stored - as a class variable (as dbm.dumb does). - - See initstdio() in Python/pythonrun.c. - """ - __doc__ = DocDescriptor() - - def __new__(cls, *args, **kwargs): - return open(*args, **kwargs) - - -class UnsupportedOperation(ValueError, IOError): - pass - - -class IOBase: - __metaclass__ = abc.ABCMeta - - """The abstract base class for all I/O classes, acting on streams of - bytes. There is no public constructor. - - This class provides dummy implementations for many methods that - derived classes can override selectively; the default implementations - represent a file that cannot be read, written or seeked. - - Even though IOBase does not declare read, readinto, or write because - their signatures will vary, implementations and clients should - consider those methods part of the interface. Also, implementations - may raise an IOError when operations they do not support are called. - - The basic type used for binary data read from or written to a file is - the bytes type. Method arguments may also be bytearray or memoryview of - arrays of bytes. In some cases, such as readinto, a writable object such - as bytearray is required. Text I/O classes work with unicode data. - - Note that calling any method (even inquiries) on a closed stream is - undefined. Implementations may raise IOError in this case. - - IOBase (and its subclasses) support the iterator protocol, meaning - that an IOBase object can be iterated over yielding the lines in a - stream. - - IOBase also supports the :keyword:`with` statement. In this example, - fp is closed after the suite of the with statement is complete: - - with open('spam.txt', 'r') as fp: - fp.write('Spam and eggs!') - """ - - ### Internal ### - - def _unsupported(self, name): - """Internal: raise an exception for unsupported operations.""" - raise UnsupportedOperation("%s.%s() not supported" % - (self.__class__.__name__, name)) - - ### Positioning ### - - def seek(self, pos, whence=0): - """Change stream position. - - Change the stream position to byte offset pos. Argument pos is - interpreted relative to the position indicated by whence. Values - for whence are: - - * 0 -- start of stream (the default); offset should be zero or positive - * 1 -- current stream position; offset may be negative - * 2 -- end of stream; offset is usually negative - - Return the new absolute position. - """ - self._unsupported("seek") - - def tell(self): - """Return current stream position.""" - return self.seek(0, 1) - - def truncate(self, pos=None): - """Truncate file to size bytes. - - Size defaults to the current IO position as reported by tell(). Return - the new size. - """ - self._unsupported("truncate") - - ### Flush and close ### - - def flush(self): - """Flush write buffers, if applicable. - - This is not implemented for read-only and non-blocking streams. - """ - self._checkClosed() - # XXX Should this return the number of bytes written??? - - __closed = False - - def close(self): - """Flush and close the IO object. - - This method has no effect if the file is already closed. - """ - if not self.__closed: - try: - self.flush() - finally: - self.__closed = True - - def __del__(self): - """Destructor. Calls close().""" - # The try/except block is in case this is called at program - # exit time, when it's possible that globals have already been - # deleted, and then the close() call might fail. Since - # there's nothing we can do about such failures and they annoy - # the end users, we suppress the traceback. - try: - self.close() - except: - pass - - ### Inquiries ### - - def seekable(self): - """Return whether object supports random access. - - If False, seek(), tell() and truncate() will raise IOError. - This method may need to do a test seek(). - """ - return False - - def _checkSeekable(self, msg=None): - """Internal: raise an IOError if file is not seekable - """ - if not self.seekable(): - raise IOError("File or stream is not seekable." - if msg is None else msg) - - - def readable(self): - """Return whether object was opened for reading. - - If False, read() will raise IOError. - """ - return False - - def _checkReadable(self, msg=None): - """Internal: raise an IOError if file is not readable - """ - if not self.readable(): - raise IOError("File or stream is not readable." - if msg is None else msg) - - def writable(self): - """Return whether object was opened for writing. - - If False, write() and truncate() will raise IOError. - """ - return False - - def _checkWritable(self, msg=None): - """Internal: raise an IOError if file is not writable - """ - if not self.writable(): - raise IOError("File or stream is not writable." - if msg is None else msg) - - @property - def closed(self): - """closed: bool. True iff the file has been closed. - - For backwards compatibility, this is a property, not a predicate. - """ - return self.__closed - - def _checkClosed(self, msg=None): - """Internal: raise a ValueError if file is closed - """ - if self.closed: - raise ValueError("I/O operation on closed file." - if msg is None else msg) - - ### Context manager ### - - def __enter__(self): - """Context management protocol. Returns self.""" - self._checkClosed() - return self - - def __exit__(self, *args): - """Context management protocol. Calls close()""" - self.close() - - ### Lower-level APIs ### - - # XXX Should these be present even if unimplemented? - - def fileno(self): - """Returns underlying file descriptor if one exists. - - An IOError is raised if the IO object does not use a file descriptor. - """ - self._unsupported("fileno") - - def isatty(self): - """Return whether this is an 'interactive' stream. - - Return False if it can't be determined. - """ - self._checkClosed() - return False - - ### Readline[s] and writelines ### - - def readline(self, limit=-1): - r"""Read and return a line from the stream. - - If limit is specified, at most limit bytes will be read. - - The line terminator is always b'\n' for binary files; for text - files, the newlines argument to open can be used to select the line - terminator(s) recognized. - """ - # For backwards compatibility, a (slowish) readline(). - if hasattr(self, "peek"): - def nreadahead(): - readahead = self.peek(1) - if not readahead: - return 1 - n = (readahead.find(b"\n") + 1) or len(readahead) - if limit >= 0: - n = min(n, limit) - return n - else: - def nreadahead(): - return 1 - if limit is None: - limit = -1 - elif not isinstance(limit, (int, long)): - raise TypeError("limit must be an integer") - res = bytearray() - while limit < 0 or len(res) < limit: - b = self.read(nreadahead()) - if not b: - break - res += b - if res.endswith(b"\n"): - break - return bytes(res) - - def __iter__(self): - self._checkClosed() - return self - - def next(self): - line = self.readline() - if not line: - raise StopIteration - return line - - def readlines(self, hint=None): - """Return a list of lines from the stream. - - hint can be specified to control the number of lines read: no more - lines will be read if the total size (in bytes/characters) of all - lines so far exceeds hint. - """ - if hint is not None and not isinstance(hint, (int, long)): - raise TypeError("integer or None expected") - if hint is None or hint <= 0: - return list(self) - n = 0 - lines = [] - for line in self: - lines.append(line) - n += len(line) - if n >= hint: - break - return lines - - def writelines(self, lines): - self._checkClosed() - for line in lines: - self.write(line) - -io.IOBase.register(IOBase) - - -class RawIOBase(IOBase): - - """Base class for raw binary I/O.""" - - # The read() method is implemented by calling readinto(); derived - # classes that want to support read() only need to implement - # readinto() as a primitive operation. In general, readinto() can be - # more efficient than read(). - - # (It would be tempting to also provide an implementation of - # readinto() in terms of read(), in case the latter is a more suitable - # primitive operation, but that would lead to nasty recursion in case - # a subclass doesn't implement either.) - - def read(self, n=-1): - """Read and return up to n bytes. - - Returns an empty bytes object on EOF, or None if the object is - set not to block and has no data to read. - """ - if n is None: - n = -1 - if n < 0: - return self.readall() - b = bytearray(n.__index__()) - n = self.readinto(b) - if n is None: - return None - del b[n:] - return bytes(b) - - def readall(self): - """Read until EOF, using multiple read() call.""" - res = bytearray() - while True: - data = self.read(DEFAULT_BUFFER_SIZE) - if not data: - break - res += data - if res: - return bytes(res) - else: - # b'' or None - return data - - def readinto(self, b): - """Read up to len(b) bytes into b. - - Returns number of bytes read (0 for EOF), or None if the object - is set not to block and has no data to read. - """ - self._unsupported("readinto") - - def write(self, b): - """Write the given buffer to the IO stream. - - Returns the number of bytes written, which may be less than len(b). - """ - self._unsupported("write") - -io.RawIOBase.register(RawIOBase) -from _io import FileIO -RawIOBase.register(FileIO) - - -class BufferedIOBase(IOBase): - - """Base class for buffered IO objects. - - The main difference with RawIOBase is that the read() method - supports omitting the size argument, and does not have a default - implementation that defers to readinto(). - - In addition, read(), readinto() and write() may raise - BlockingIOError if the underlying raw stream is in non-blocking - mode and not ready; unlike their raw counterparts, they will never - return None. - - A typical implementation should not inherit from a RawIOBase - implementation, but wrap one. - """ - - def read(self, n=None): - """Read and return up to n bytes. - - If the argument is omitted, None, or negative, reads and - returns all data until EOF. - - If the argument is positive, and the underlying raw stream is - not 'interactive', multiple raw reads may be issued to satisfy - the byte count (unless EOF is reached first). But for - interactive raw streams (XXX and for pipes?), at most one raw - read will be issued, and a short result does not imply that - EOF is imminent. - - Returns an empty bytes array on EOF. - - Raises BlockingIOError if the underlying raw stream has no - data at the moment. - """ - self._unsupported("read") - - def read1(self, n=None): - """Read up to n bytes with at most one read() system call.""" - self._unsupported("read1") - - def readinto(self, b): - """Read up to len(b) bytes into b. - - Like read(), this may issue multiple reads to the underlying raw - stream, unless the latter is 'interactive'. - - Returns the number of bytes read (0 for EOF). - - Raises BlockingIOError if the underlying raw stream has no - data at the moment. - """ - data = self.read(len(b)) - n = len(data) - try: - b[:n] = data - except TypeError as err: - import array - if not isinstance(b, array.array): - raise err - b[:n] = array.array(b'b', data) - return n - - def write(self, b): - """Write the given buffer to the IO stream. - - Return the number of bytes written, which is always len(b). - - Raises BlockingIOError if the buffer is full and the - underlying raw stream cannot accept more data at the moment. - """ - self._unsupported("write") - - def detach(self): - """ - Separate the underlying raw stream from the buffer and return it. - - After the raw stream has been detached, the buffer is in an unusable - state. - """ - self._unsupported("detach") - -io.BufferedIOBase.register(BufferedIOBase) - - -class _BufferedIOMixin(BufferedIOBase): - - """A mixin implementation of BufferedIOBase with an underlying raw stream. - - This passes most requests on to the underlying raw stream. It - does *not* provide implementations of read(), readinto() or - write(). - """ - - def __init__(self, raw): - self._raw = raw - - ### Positioning ### - - def seek(self, pos, whence=0): - new_position = self.raw.seek(pos, whence) - if new_position < 0: - raise IOError("seek() returned an invalid position") - return new_position - - def tell(self): - pos = self.raw.tell() - if pos < 0: - raise IOError("tell() returned an invalid position") - return pos - - def truncate(self, pos=None): - # Flush the stream. We're mixing buffered I/O with lower-level I/O, - # and a flush may be necessary to synch both views of the current - # file state. - self.flush() - - if pos is None: - pos = self.tell() - # XXX: Should seek() be used, instead of passing the position - # XXX directly to truncate? - return self.raw.truncate(pos) - - ### Flush and close ### - - def flush(self): - if self.closed: - raise ValueError("flush of closed file") - self.raw.flush() - - def close(self): - if self.raw is not None and not self.closed: - try: - # may raise BlockingIOError or BrokenPipeError etc - self.flush() - finally: - self.raw.close() - - def detach(self): - if self.raw is None: - raise ValueError("raw stream already detached") - self.flush() - raw = self._raw - self._raw = None - return raw - - ### Inquiries ### - - def seekable(self): - return self.raw.seekable() - - def readable(self): - return self.raw.readable() - - def writable(self): - return self.raw.writable() - - @property - def raw(self): - return self._raw - - @property - def closed(self): - return self.raw.closed - - @property - def name(self): - return self.raw.name - - @property - def mode(self): - return self.raw.mode - - def __repr__(self): - clsname = self.__class__.__name__ - try: - name = self.name - except Exception: - return "<_pyio.{0}>".format(clsname) - else: - return "<_pyio.{0} name={1!r}>".format(clsname, name) - - ### Lower-level APIs ### - - def fileno(self): - return self.raw.fileno() - - def isatty(self): - return self.raw.isatty() - - -class BytesIO(BufferedIOBase): - - """Buffered I/O implementation using an in-memory bytes buffer.""" - - def __init__(self, initial_bytes=None): - buf = bytearray() - if initial_bytes is not None: - buf.extend(initial_bytes) - self._buffer = buf - self._pos = 0 - - def __getstate__(self): - if self.closed: - raise ValueError("__getstate__ on closed file") - return self.__dict__.copy() - - def getvalue(self): - """Return the bytes value (contents) of the buffer - """ - if self.closed: - raise ValueError("getvalue on closed file") - return bytes(self._buffer) - - def read(self, n=None): - if self.closed: - raise ValueError("read from closed file") - if n is None: - n = -1 - if not isinstance(n, (int, long)): - raise TypeError("integer argument expected, got {0!r}".format( - type(n))) - if n < 0: - n = len(self._buffer) - if len(self._buffer) <= self._pos: - return b"" - newpos = min(len(self._buffer), self._pos + n) - b = self._buffer[self._pos : newpos] - self._pos = newpos - return bytes(b) - - def read1(self, n): - """This is the same as read. - """ - return self.read(n) - - def write(self, b): - if self.closed: - raise ValueError("write to closed file") - if isinstance(b, unicode): - raise TypeError("can't write unicode to binary stream") - n = len(b) - if n == 0: - return 0 - pos = self._pos - if pos > len(self._buffer): - # Inserts null bytes between the current end of the file - # and the new write position. - padding = b'\x00' * (pos - len(self._buffer)) - self._buffer += padding - self._buffer[pos:pos + n] = b - self._pos += n - return n - - def seek(self, pos, whence=0): - if self.closed: - raise ValueError("seek on closed file") - try: - pos.__index__ - except AttributeError: - raise TypeError("an integer is required") - if whence == 0: - if pos < 0: - raise ValueError("negative seek position %r" % (pos,)) - self._pos = pos - elif whence == 1: - self._pos = max(0, self._pos + pos) - elif whence == 2: - self._pos = max(0, len(self._buffer) + pos) - else: - raise ValueError("invalid whence value") - return self._pos - - def tell(self): - if self.closed: - raise ValueError("tell on closed file") - return self._pos - - def truncate(self, pos=None): - if self.closed: - raise ValueError("truncate on closed file") - if pos is None: - pos = self._pos - else: - try: - pos.__index__ - except AttributeError: - raise TypeError("an integer is required") - if pos < 0: - raise ValueError("negative truncate position %r" % (pos,)) - del self._buffer[pos:] - return pos - - def readable(self): - if self.closed: - raise ValueError("I/O operation on closed file.") - return True - - def writable(self): - if self.closed: - raise ValueError("I/O operation on closed file.") - return True - - def seekable(self): - if self.closed: - raise ValueError("I/O operation on closed file.") - return True - - -class BufferedReader(_BufferedIOMixin): - - """BufferedReader(raw[, buffer_size]) - - A buffer for a readable, sequential BaseRawIO object. - - The constructor creates a BufferedReader for the given readable raw - stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE - is used. - """ - - def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): - """Create a new buffered reader using the given readable raw IO object. - """ - if not raw.readable(): - raise IOError('"raw" argument must be readable.') - - _BufferedIOMixin.__init__(self, raw) - if buffer_size <= 0: - raise ValueError("invalid buffer size") - self.buffer_size = buffer_size - self._reset_read_buf() - self._read_lock = Lock() - - def _reset_read_buf(self): - self._read_buf = b"" - self._read_pos = 0 - - def read(self, n=None): - """Read n bytes. - - Returns exactly n bytes of data unless the underlying raw IO - stream reaches EOF or if the call would block in non-blocking - mode. If n is negative, read until EOF or until read() would - block. - """ - if n is not None and n < -1: - raise ValueError("invalid number of bytes to read") - with self._read_lock: - return self._read_unlocked(n) - - def _read_unlocked(self, n=None): - nodata_val = b"" - empty_values = (b"", None) - buf = self._read_buf - pos = self._read_pos - - # Special case for when the number of bytes to read is unspecified. - if n is None or n == -1: - self._reset_read_buf() - chunks = [buf[pos:]] # Strip the consumed bytes. - current_size = 0 - while True: - # Read until EOF or until read() would block. - try: - chunk = self.raw.read() - except IOError as e: - if e.errno != EINTR: - raise - continue - if chunk in empty_values: - nodata_val = chunk - break - current_size += len(chunk) - chunks.append(chunk) - return b"".join(chunks) or nodata_val - - # The number of bytes to read is specified, return at most n bytes. - avail = len(buf) - pos # Length of the available buffered data. - if n <= avail: - # Fast path: the data to read is fully buffered. - self._read_pos += n - return buf[pos:pos+n] - # Slow path: read from the stream until enough bytes are read, - # or until an EOF occurs or until read() would block. - chunks = [buf[pos:]] - wanted = max(self.buffer_size, n) - while avail < n: - try: - chunk = self.raw.read(wanted) - except IOError as e: - if e.errno != EINTR: - raise - continue - if chunk in empty_values: - nodata_val = chunk - break - avail += len(chunk) - chunks.append(chunk) - # n is more than avail only when an EOF occurred or when - # read() would have blocked. - n = min(n, avail) - out = b"".join(chunks) - self._read_buf = out[n:] # Save the extra data in the buffer. - self._read_pos = 0 - return out[:n] if out else nodata_val - - def peek(self, n=0): - """Returns buffered bytes without advancing the position. - - The argument indicates a desired minimal number of bytes; we - do at most one raw read to satisfy it. We never return more - than self.buffer_size. - """ - with self._read_lock: - return self._peek_unlocked(n) - - def _peek_unlocked(self, n=0): - want = min(n, self.buffer_size) - have = len(self._read_buf) - self._read_pos - if have < want or have <= 0: - to_read = self.buffer_size - have - while True: - try: - current = self.raw.read(to_read) - except IOError as e: - if e.errno != EINTR: - raise - continue - break - if current: - self._read_buf = self._read_buf[self._read_pos:] + current - self._read_pos = 0 - return self._read_buf[self._read_pos:] - - def read1(self, n): - """Reads up to n bytes, with at most one read() system call.""" - # Returns up to n bytes. If at least one byte is buffered, we - # only return buffered bytes. Otherwise, we do one raw read. - if n < 0: - raise ValueError("number of bytes to read must be positive") - if n == 0: - return b"" - with self._read_lock: - self._peek_unlocked(1) - return self._read_unlocked( - min(n, len(self._read_buf) - self._read_pos)) - - def tell(self): - return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos - - def seek(self, pos, whence=0): - if not (0 <= whence <= 2): - raise ValueError("invalid whence value") - with self._read_lock: - if whence == 1: - pos -= len(self._read_buf) - self._read_pos - pos = _BufferedIOMixin.seek(self, pos, whence) - self._reset_read_buf() - return pos - -class BufferedWriter(_BufferedIOMixin): - - """A buffer for a writeable sequential RawIO object. - - The constructor creates a BufferedWriter for the given writeable raw - stream. If the buffer_size is not given, it defaults to - DEFAULT_BUFFER_SIZE. - """ - - _warning_stack_offset = 2 - - def __init__(self, raw, - buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): - if not raw.writable(): - raise IOError('"raw" argument must be writable.') - - _BufferedIOMixin.__init__(self, raw) - if buffer_size <= 0: - raise ValueError("invalid buffer size") - if max_buffer_size is not None: - warnings.warn("max_buffer_size is deprecated", DeprecationWarning, - self._warning_stack_offset) - self.buffer_size = buffer_size - self._write_buf = bytearray() - self._write_lock = Lock() - - def write(self, b): - if self.closed: - raise ValueError("write to closed file") - if isinstance(b, unicode): - raise TypeError("can't write unicode to binary stream") - with self._write_lock: - # XXX we can implement some more tricks to try and avoid - # partial writes - if len(self._write_buf) > self.buffer_size: - # We're full, so let's pre-flush the buffer. (This may - # raise BlockingIOError with characters_written == 0.) - self._flush_unlocked() - before = len(self._write_buf) - self._write_buf.extend(b) - written = len(self._write_buf) - before - if len(self._write_buf) > self.buffer_size: - try: - self._flush_unlocked() - except BlockingIOError as e: - if len(self._write_buf) > self.buffer_size: - # We've hit the buffer_size. We have to accept a partial - # write and cut back our buffer. - overage = len(self._write_buf) - self.buffer_size - written -= overage - self._write_buf = self._write_buf[:self.buffer_size] - raise BlockingIOError(e.errno, e.strerror, written) - return written - - def truncate(self, pos=None): - with self._write_lock: - self._flush_unlocked() - if pos is None: - pos = self.raw.tell() - return self.raw.truncate(pos) - - def flush(self): - with self._write_lock: - self._flush_unlocked() - - def _flush_unlocked(self): - if self.closed: - raise ValueError("flush of closed file") - while self._write_buf: - try: - n = self.raw.write(self._write_buf) - except BlockingIOError: - raise RuntimeError("self.raw should implement RawIOBase: it " - "should not raise BlockingIOError") - except IOError as e: - if e.errno != EINTR: - raise - continue - if n is None: - raise BlockingIOError( - errno.EAGAIN, - "write could not complete without blocking", 0) - if n > len(self._write_buf) or n < 0: - raise IOError("write() returned incorrect number of bytes") - del self._write_buf[:n] - - def tell(self): - return _BufferedIOMixin.tell(self) + len(self._write_buf) - - def seek(self, pos, whence=0): - if not (0 <= whence <= 2): - raise ValueError("invalid whence") - with self._write_lock: - self._flush_unlocked() - return _BufferedIOMixin.seek(self, pos, whence) - - -class BufferedRWPair(BufferedIOBase): - - """A buffered reader and writer object together. - - A buffered reader object and buffered writer object put together to - form a sequential IO object that can read and write. This is typically - used with a socket or two-way pipe. - - reader and writer are RawIOBase objects that are readable and - writeable respectively. If the buffer_size is omitted it defaults to - DEFAULT_BUFFER_SIZE. - """ - - # XXX The usefulness of this (compared to having two separate IO - # objects) is questionable. - - def __init__(self, reader, writer, - buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): - """Constructor. - - The arguments are two RawIO instances. - """ - if max_buffer_size is not None: - warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2) - - if not reader.readable(): - raise IOError('"reader" argument must be readable.') - - if not writer.writable(): - raise IOError('"writer" argument must be writable.') - - self.reader = BufferedReader(reader, buffer_size) - self.writer = BufferedWriter(writer, buffer_size) - - def read(self, n=None): - if n is None: - n = -1 - return self.reader.read(n) - - def readinto(self, b): - return self.reader.readinto(b) - - def write(self, b): - return self.writer.write(b) - - def peek(self, n=0): - return self.reader.peek(n) - - def read1(self, n): - return self.reader.read1(n) - - def readable(self): - return self.reader.readable() - - def writable(self): - return self.writer.writable() - - def flush(self): - return self.writer.flush() - - def close(self): - try: - self.writer.close() - finally: - self.reader.close() - - def isatty(self): - return self.reader.isatty() or self.writer.isatty() - - @property - def closed(self): - return self.writer.closed - - -class BufferedRandom(BufferedWriter, BufferedReader): - - """A buffered interface to random access streams. - - The constructor creates a reader and writer for a seekable stream, - raw, given in the first argument. If the buffer_size is omitted it - defaults to DEFAULT_BUFFER_SIZE. - """ - - _warning_stack_offset = 3 - - def __init__(self, raw, - buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): - raw._checkSeekable() - BufferedReader.__init__(self, raw, buffer_size) - BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size) - - def seek(self, pos, whence=0): - if not (0 <= whence <= 2): - raise ValueError("invalid whence") - self.flush() - if self._read_buf: - # Undo read ahead. - with self._read_lock: - self.raw.seek(self._read_pos - len(self._read_buf), 1) - # First do the raw seek, then empty the read buffer, so that - # if the raw seek fails, we don't lose buffered data forever. - pos = self.raw.seek(pos, whence) - with self._read_lock: - self._reset_read_buf() - if pos < 0: - raise IOError("seek() returned invalid position") - return pos - - def tell(self): - if self._write_buf: - return BufferedWriter.tell(self) - else: - return BufferedReader.tell(self) - - def truncate(self, pos=None): - if pos is None: - pos = self.tell() - # Use seek to flush the read buffer. - return BufferedWriter.truncate(self, pos) - - def read(self, n=None): - if n is None: - n = -1 - self.flush() - return BufferedReader.read(self, n) - - def readinto(self, b): - self.flush() - return BufferedReader.readinto(self, b) - - def peek(self, n=0): - self.flush() - return BufferedReader.peek(self, n) - - def read1(self, n): - self.flush() - return BufferedReader.read1(self, n) - - def write(self, b): - if self._read_buf: - # Undo readahead - with self._read_lock: - self.raw.seek(self._read_pos - len(self._read_buf), 1) - self._reset_read_buf() - return BufferedWriter.write(self, b) - - -class TextIOBase(IOBase): - - """Base class for text I/O. - - This class provides a character and line based interface to stream - I/O. There is no readinto method because Python's character strings - are immutable. There is no public constructor. - """ - - def read(self, n=-1): - """Read at most n characters from stream. - - Read from underlying buffer until we have n characters or we hit EOF. - If n is negative or omitted, read until EOF. - """ - self._unsupported("read") - - def write(self, s): - """Write string s to stream.""" - self._unsupported("write") - - def truncate(self, pos=None): - """Truncate size to pos.""" - self._unsupported("truncate") - - def readline(self): - """Read until newline or EOF. - - Returns an empty string if EOF is hit immediately. - """ - self._unsupported("readline") - - def detach(self): - """ - Separate the underlying buffer from the TextIOBase and return it. - - After the underlying buffer has been detached, the TextIO is in an - unusable state. - """ - self._unsupported("detach") - - @property - def encoding(self): - """Subclasses should override.""" - return None - - @property - def newlines(self): - """Line endings translated so far. - - Only line endings translated during reading are considered. - - Subclasses should override. - """ - return None - - @property - def errors(self): - """Error setting of the decoder or encoder. - - Subclasses should override.""" - return None - -io.TextIOBase.register(TextIOBase) - - -class IncrementalNewlineDecoder(codecs.IncrementalDecoder): - r"""Codec used when reading a file in universal newlines mode. It wraps - another incremental decoder, translating \r\n and \r into \n. It also - records the types of newlines encountered. When used with - translate=False, it ensures that the newline sequence is returned in - one piece. - """ - def __init__(self, decoder, translate, errors='strict'): - codecs.IncrementalDecoder.__init__(self, errors=errors) - self.translate = translate - self.decoder = decoder - self.seennl = 0 - self.pendingcr = False - - def decode(self, input, final=False): - # decode input (with the eventual \r from a previous pass) - if self.decoder is None: - output = input - else: - output = self.decoder.decode(input, final=final) - if self.pendingcr and (output or final): - output = "\r" + output - self.pendingcr = False - - # retain last \r even when not translating data: - # then readline() is sure to get \r\n in one pass - if output.endswith("\r") and not final: - output = output[:-1] - self.pendingcr = True - - # Record which newlines are read - crlf = output.count('\r\n') - cr = output.count('\r') - crlf - lf = output.count('\n') - crlf - self.seennl |= (lf and self._LF) | (cr and self._CR) \ - | (crlf and self._CRLF) - - if self.translate: - if crlf: - output = output.replace("\r\n", "\n") - if cr: - output = output.replace("\r", "\n") - - return output - - def getstate(self): - if self.decoder is None: - buf = b"" - flag = 0 - else: - buf, flag = self.decoder.getstate() - flag <<= 1 - if self.pendingcr: - flag |= 1 - return buf, flag - - def setstate(self, state): - buf, flag = state - self.pendingcr = bool(flag & 1) - if self.decoder is not None: - self.decoder.setstate((buf, flag >> 1)) - - def reset(self): - self.seennl = 0 - self.pendingcr = False - if self.decoder is not None: - self.decoder.reset() - - _LF = 1 - _CR = 2 - _CRLF = 4 - - @property - def newlines(self): - return (None, - "\n", - "\r", - ("\r", "\n"), - "\r\n", - ("\n", "\r\n"), - ("\r", "\r\n"), - ("\r", "\n", "\r\n") - )[self.seennl] - - -class TextIOWrapper(TextIOBase): - - r"""Character and line based layer over a BufferedIOBase object, buffer. - - encoding gives the name of the encoding that the stream will be - decoded or encoded with. It defaults to locale.getpreferredencoding. - - errors determines the strictness of encoding and decoding (see the - codecs.register) and defaults to "strict". - - newline can be None, '', '\n', '\r', or '\r\n'. It controls the - handling of line endings. If it is None, universal newlines is - enabled. With this enabled, on input, the lines endings '\n', '\r', - or '\r\n' are translated to '\n' before being returned to the - caller. Conversely, on output, '\n' is translated to the system - default line separator, os.linesep. If newline is any other of its - legal values, that newline becomes the newline when the file is read - and it is returned untranslated. On output, '\n' is converted to the - newline. - - If line_buffering is True, a call to flush is implied when a call to - write contains a newline character. - """ - - _CHUNK_SIZE = 2048 - - def __init__(self, buffer, encoding=None, errors=None, newline=None, - line_buffering=False): - if newline is not None and not isinstance(newline, basestring): - raise TypeError("illegal newline type: %r" % (type(newline),)) - if newline not in (None, "", "\n", "\r", "\r\n"): - raise ValueError("illegal newline value: %r" % (newline,)) - if encoding is None: - try: - import locale - except ImportError: - # Importing locale may fail if Python is being built - encoding = "ascii" - else: - encoding = locale.getpreferredencoding() - - if not isinstance(encoding, basestring): - raise ValueError("invalid encoding: %r" % encoding) - - if sys.py3kwarning and not codecs.lookup(encoding)._is_text_encoding: - msg = ("%r is not a text encoding; " - "use codecs.open() to handle arbitrary codecs") - warnings.warnpy3k(msg % encoding, stacklevel=2) - - if errors is None: - errors = "strict" - else: - if not isinstance(errors, basestring): - raise ValueError("invalid errors: %r" % errors) - - self._buffer = buffer - self._line_buffering = line_buffering - self._encoding = encoding - self._errors = errors - self._readuniversal = not newline - self._readtranslate = newline is None - self._readnl = newline - self._writetranslate = newline != '' - self._writenl = newline or os.linesep - self._encoder = None - self._decoder = None - self._decoded_chars = '' # buffer for text returned from decoder - self._decoded_chars_used = 0 # offset into _decoded_chars for read() - self._snapshot = None # info for reconstructing decoder state - self._seekable = self._telling = self.buffer.seekable() - - if self._seekable and self.writable(): - position = self.buffer.tell() - if position != 0: - try: - self._get_encoder().setstate(0) - except LookupError: - # Sometimes the encoder doesn't exist - pass - - # self._snapshot is either None, or a tuple (dec_flags, next_input) - # where dec_flags is the second (integer) item of the decoder state - # and next_input is the chunk of input bytes that comes next after the - # snapshot point. We use this to reconstruct decoder states in tell(). - - # Naming convention: - # - "bytes_..." for integer variables that count input bytes - # - "chars_..." for integer variables that count decoded characters - - def __repr__(self): - try: - name = self.name - except Exception: - return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding) - else: - return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format( - name, self.encoding) - - @property - def encoding(self): - return self._encoding - - @property - def errors(self): - return self._errors - - @property - def line_buffering(self): - return self._line_buffering - - @property - def buffer(self): - return self._buffer - - def seekable(self): - if self.closed: - raise ValueError("I/O operation on closed file.") - return self._seekable - - def readable(self): - return self.buffer.readable() - - def writable(self): - return self.buffer.writable() - - def flush(self): - self.buffer.flush() - self._telling = self._seekable - - def close(self): - if self.buffer is not None and not self.closed: - try: - self.flush() - finally: - self.buffer.close() - - @property - def closed(self): - return self.buffer.closed - - @property - def name(self): - return self.buffer.name - - def fileno(self): - return self.buffer.fileno() - - def isatty(self): - return self.buffer.isatty() - - def write(self, s): - if self.closed: - raise ValueError("write to closed file") - if not isinstance(s, unicode): - raise TypeError("can't write %s to text stream" % - s.__class__.__name__) - length = len(s) - haslf = (self._writetranslate or self._line_buffering) and "\n" in s - if haslf and self._writetranslate and self._writenl != "\n": - s = s.replace("\n", self._writenl) - encoder = self._encoder or self._get_encoder() - # XXX What if we were just reading? - b = encoder.encode(s) - self.buffer.write(b) - if self._line_buffering and (haslf or "\r" in s): - self.flush() - self._snapshot = None - if self._decoder: - self._decoder.reset() - return length - - def _get_encoder(self): - make_encoder = codecs.getincrementalencoder(self._encoding) - self._encoder = make_encoder(self._errors) - return self._encoder - - def _get_decoder(self): - make_decoder = codecs.getincrementaldecoder(self._encoding) - decoder = make_decoder(self._errors) - if self._readuniversal: - decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) - self._decoder = decoder - return decoder - - # The following three methods implement an ADT for _decoded_chars. - # Text returned from the decoder is buffered here until the client - # requests it by calling our read() or readline() method. - def _set_decoded_chars(self, chars): - """Set the _decoded_chars buffer.""" - self._decoded_chars = chars - self._decoded_chars_used = 0 - - def _get_decoded_chars(self, n=None): - """Advance into the _decoded_chars buffer.""" - offset = self._decoded_chars_used - if n is None: - chars = self._decoded_chars[offset:] - else: - chars = self._decoded_chars[offset:offset + n] - self._decoded_chars_used += len(chars) - return chars - - def _rewind_decoded_chars(self, n): - """Rewind the _decoded_chars buffer.""" - if self._decoded_chars_used < n: - raise AssertionError("rewind decoded_chars out of bounds") - self._decoded_chars_used -= n - - def _read_chunk(self): - """ - Read and decode the next chunk of data from the BufferedReader. - """ - - # The return value is True unless EOF was reached. The decoded - # string is placed in self._decoded_chars (replacing its previous - # value). The entire input chunk is sent to the decoder, though - # some of it may remain buffered in the decoder, yet to be - # converted. - - if self._decoder is None: - raise ValueError("no decoder") - - if self._telling: - # To prepare for tell(), we need to snapshot a point in the - # file where the decoder's input buffer is empty. - - dec_buffer, dec_flags = self._decoder.getstate() - # Given this, we know there was a valid snapshot point - # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). - - # Read a chunk, decode it, and put the result in self._decoded_chars. - input_chunk = self.buffer.read1(self._CHUNK_SIZE) - eof = not input_chunk - self._set_decoded_chars(self._decoder.decode(input_chunk, eof)) - - if self._telling: - # At the snapshot point, len(dec_buffer) bytes before the read, - # the next input to be decoded is dec_buffer + input_chunk. - self._snapshot = (dec_flags, dec_buffer + input_chunk) - - return not eof - - def _pack_cookie(self, position, dec_flags=0, - bytes_to_feed=0, need_eof=0, chars_to_skip=0): - # The meaning of a tell() cookie is: seek to position, set the - # decoder flags to dec_flags, read bytes_to_feed bytes, feed them - # into the decoder with need_eof as the EOF flag, then skip - # chars_to_skip characters of the decoded result. For most simple - # decoders, tell() will often just give a byte offset in the file. - return (position | (dec_flags<<64) | (bytes_to_feed<<128) | - (chars_to_skip<<192) | bool(need_eof)<<256) - - def _unpack_cookie(self, bigint): - rest, position = divmod(bigint, 1<<64) - rest, dec_flags = divmod(rest, 1<<64) - rest, bytes_to_feed = divmod(rest, 1<<64) - need_eof, chars_to_skip = divmod(rest, 1<<64) - return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip - - def tell(self): - if not self._seekable: - raise IOError("underlying stream is not seekable") - if not self._telling: - raise IOError("telling position disabled by next() call") - self.flush() - position = self.buffer.tell() - decoder = self._decoder - if decoder is None or self._snapshot is None: - if self._decoded_chars: - # This should never happen. - raise AssertionError("pending decoded text") - return position - - # Skip backward to the snapshot point (see _read_chunk). - dec_flags, next_input = self._snapshot - position -= len(next_input) - - # How many decoded characters have been used up since the snapshot? - chars_to_skip = self._decoded_chars_used - if chars_to_skip == 0: - # We haven't moved from the snapshot point. - return self._pack_cookie(position, dec_flags) - - # Starting from the snapshot position, we will walk the decoder - # forward until it gives us enough decoded characters. - saved_state = decoder.getstate() - try: - # Note our initial start point. - decoder.setstate((b'', dec_flags)) - start_pos = position - start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 - need_eof = 0 - - # Feed the decoder one byte at a time. As we go, note the - # nearest "safe start point" before the current location - # (a point where the decoder has nothing buffered, so seek() - # can safely start from there and advance to this location). - for next_byte in next_input: - bytes_fed += 1 - chars_decoded += len(decoder.decode(next_byte)) - dec_buffer, dec_flags = decoder.getstate() - if not dec_buffer and chars_decoded <= chars_to_skip: - # Decoder buffer is empty, so this is a safe start point. - start_pos += bytes_fed - chars_to_skip -= chars_decoded - start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 - if chars_decoded >= chars_to_skip: - break - else: - # We didn't get enough decoded data; signal EOF to get more. - chars_decoded += len(decoder.decode(b'', final=True)) - need_eof = 1 - if chars_decoded < chars_to_skip: - raise IOError("can't reconstruct logical file position") - - # The returned cookie corresponds to the last safe start point. - return self._pack_cookie( - start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) - finally: - decoder.setstate(saved_state) - - def truncate(self, pos=None): - self.flush() - if pos is None: - pos = self.tell() - return self.buffer.truncate(pos) - - def detach(self): - if self.buffer is None: - raise ValueError("buffer is already detached") - self.flush() - buffer = self._buffer - self._buffer = None - return buffer - - def seek(self, cookie, whence=0): - if self.closed: - raise ValueError("tell on closed file") - if not self._seekable: - raise IOError("underlying stream is not seekable") - if whence == 1: # seek relative to current position - if cookie != 0: - raise IOError("can't do nonzero cur-relative seeks") - # Seeking to the current position should attempt to - # sync the underlying buffer with the current position. - whence = 0 - cookie = self.tell() - if whence == 2: # seek relative to end of file - if cookie != 0: - raise IOError("can't do nonzero end-relative seeks") - self.flush() - position = self.buffer.seek(0, 2) - self._set_decoded_chars('') - self._snapshot = None - if self._decoder: - self._decoder.reset() - return position - if whence != 0: - raise ValueError("invalid whence (%r, should be 0, 1 or 2)" % - (whence,)) - if cookie < 0: - raise ValueError("negative seek position %r" % (cookie,)) - self.flush() - - # The strategy of seek() is to go back to the safe start point - # and replay the effect of read(chars_to_skip) from there. - start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ - self._unpack_cookie(cookie) - - # Seek back to the safe start point. - self.buffer.seek(start_pos) - self._set_decoded_chars('') - self._snapshot = None - - # Restore the decoder to its state from the safe start point. - if cookie == 0 and self._decoder: - self._decoder.reset() - elif self._decoder or dec_flags or chars_to_skip: - self._decoder = self._decoder or self._get_decoder() - self._decoder.setstate((b'', dec_flags)) - self._snapshot = (dec_flags, b'') - - if chars_to_skip: - # Just like _read_chunk, feed the decoder and save a snapshot. - input_chunk = self.buffer.read(bytes_to_feed) - self._set_decoded_chars( - self._decoder.decode(input_chunk, need_eof)) - self._snapshot = (dec_flags, input_chunk) - - # Skip chars_to_skip of the decoded characters. - if len(self._decoded_chars) < chars_to_skip: - raise IOError("can't restore logical file position") - self._decoded_chars_used = chars_to_skip - - # Finally, reset the encoder (merely useful for proper BOM handling) - try: - encoder = self._encoder or self._get_encoder() - except LookupError: - # Sometimes the encoder doesn't exist - pass - else: - if cookie != 0: - encoder.setstate(0) - else: - encoder.reset() - return cookie - - def read(self, n=None): - self._checkReadable() - if n is None: - n = -1 - decoder = self._decoder or self._get_decoder() - try: - n.__index__ - except AttributeError: - raise TypeError("an integer is required") - if n < 0: - # Read everything. - result = (self._get_decoded_chars() + - decoder.decode(self.buffer.read(), final=True)) - self._set_decoded_chars('') - self._snapshot = None - return result - else: - # Keep reading chunks until we have n characters to return. - eof = False - result = self._get_decoded_chars(n) - while len(result) < n and not eof: - eof = not self._read_chunk() - result += self._get_decoded_chars(n - len(result)) - return result - - def next(self): - self._telling = False - line = self.readline() - if not line: - self._snapshot = None - self._telling = self._seekable - raise StopIteration - return line - - def readline(self, limit=None): - if self.closed: - raise ValueError("read from closed file") - if limit is None: - limit = -1 - elif not isinstance(limit, (int, long)): - raise TypeError("limit must be an integer") - - # Grab all the decoded text (we will rewind any extra bits later). - line = self._get_decoded_chars() - - start = 0 - # Make the decoder if it doesn't already exist. - if not self._decoder: - self._get_decoder() - - pos = endpos = None - while True: - if self._readtranslate: - # Newlines are already translated, only search for \n - pos = line.find('\n', start) - if pos >= 0: - endpos = pos + 1 - break - else: - start = len(line) - - elif self._readuniversal: - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - - # In C we'd look for these in parallel of course. - nlpos = line.find("\n", start) - crpos = line.find("\r", start) - if crpos == -1: - if nlpos == -1: - # Nothing found - start = len(line) - else: - # Found \n - endpos = nlpos + 1 - break - elif nlpos == -1: - # Found lone \r - endpos = crpos + 1 - break - elif nlpos < crpos: - # Found \n - endpos = nlpos + 1 - break - elif nlpos == crpos + 1: - # Found \r\n - endpos = crpos + 2 - break - else: - # Found \r - endpos = crpos + 1 - break - else: - # non-universal - pos = line.find(self._readnl) - if pos >= 0: - endpos = pos + len(self._readnl) - break - - if limit >= 0 and len(line) >= limit: - endpos = limit # reached length limit - break - - # No line ending seen yet - get more data' - while self._read_chunk(): - if self._decoded_chars: - break - if self._decoded_chars: - line += self._get_decoded_chars() - else: - # end of file - self._set_decoded_chars('') - self._snapshot = None - return line - - if limit >= 0 and endpos > limit: - endpos = limit # don't exceed limit - - # Rewind _decoded_chars to just after the line ending we found. - self._rewind_decoded_chars(len(line) - endpos) - return line[:endpos] - - @property - def newlines(self): - return self._decoder.newlines if self._decoder else None - - -class StringIO(TextIOWrapper): - """Text I/O implementation using an in-memory buffer. - - The initial_value argument sets the value of object. The newline - argument is like the one of TextIOWrapper's constructor. - """ - - def __init__(self, initial_value="", newline="\n"): - super(StringIO, self).__init__(BytesIO(), - encoding="utf-8", - errors="strict", - newline=newline) - # Issue #5645: make universal newlines semantics the same as in the - # C version, even under Windows. - if newline is None: - self._writetranslate = False - if initial_value: - if not isinstance(initial_value, unicode): - initial_value = unicode(initial_value) - self.write(initial_value) - self.seek(0) - - def getvalue(self): - self.flush() - decoder = self._decoder or self._get_decoder() - old_state = decoder.getstate() - decoder.reset() - try: - return decoder.decode(self.buffer.getvalue(), final=True) - finally: - decoder.setstate(old_state) - - def __repr__(self): - # TextIOWrapper tells the encoding in its repr. In StringIO, - # that's an implementation detail. - return object.__repr__(self) - - @property - def errors(self): - return None - - @property - def encoding(self): - return None - - def detach(self): - # This doesn't make sense on StringIO. - self._unsupported("detach") diff --git a/python/Lib/_strptime.py b/python/Lib/_strptime.py deleted file mode 100755 index feac05a001..0000000000 --- a/python/Lib/_strptime.py +++ /dev/null @@ -1,478 +0,0 @@ -"""Strptime-related classes and functions. - -CLASSES: - LocaleTime -- Discovers and stores locale-specific time information - TimeRE -- Creates regexes for pattern matching a string of text containing - time information - -FUNCTIONS: - _getlang -- Figure out what language is being used for the locale - strptime -- Calculates the time struct represented by the passed-in string - -""" -import time -import locale -import calendar -from re import compile as re_compile -from re import IGNORECASE -from re import escape as re_escape -from datetime import date as datetime_date -try: - from thread import allocate_lock as _thread_allocate_lock -except: - from dummy_thread import allocate_lock as _thread_allocate_lock - -__all__ = [] - -def _getlang(): - # Figure out what the current language is set to. - return locale.getlocale(locale.LC_TIME) - -class LocaleTime(object): - """Stores and handles locale-specific information related to time. - - ATTRIBUTES: - f_weekday -- full weekday names (7-item list) - a_weekday -- abbreviated weekday names (7-item list) - f_month -- full month names (13-item list; dummy value in [0], which - is added by code) - a_month -- abbreviated month names (13-item list, dummy value in - [0], which is added by code) - am_pm -- AM/PM representation (2-item list) - LC_date_time -- format string for date/time representation (string) - LC_date -- format string for date representation (string) - LC_time -- format string for time representation (string) - timezone -- daylight- and non-daylight-savings timezone representation - (2-item list of sets) - lang -- Language used by instance (2-item tuple) - """ - - def __init__(self): - """Set all attributes. - - Order of methods called matters for dependency reasons. - - The locale language is set at the offset and then checked again before - exiting. This is to make sure that the attributes were not set with a - mix of information from more than one locale. This would most likely - happen when using threads where one thread calls a locale-dependent - function while another thread changes the locale while the function in - the other thread is still running. Proper coding would call for - locks to prevent changing the locale while locale-dependent code is - running. The check here is done in case someone does not think about - doing this. - - Only other possible issue is if someone changed the timezone and did - not call tz.tzset . That is an issue for the programmer, though, - since changing the timezone is worthless without that call. - - """ - self.lang = _getlang() - self.__calc_weekday() - self.__calc_month() - self.__calc_am_pm() - self.__calc_timezone() - self.__calc_date_time() - if _getlang() != self.lang: - raise ValueError("locale changed during initialization") - if time.tzname != self.tzname or time.daylight != self.daylight: - raise ValueError("timezone changed during initialization") - - def __pad(self, seq, front): - # Add '' to seq to either the front (is True), else the back. - seq = list(seq) - if front: - seq.insert(0, '') - else: - seq.append('') - return seq - - def __calc_weekday(self): - # Set self.a_weekday and self.f_weekday using the calendar - # module. - a_weekday = [calendar.day_abbr[i].lower() for i in range(7)] - f_weekday = [calendar.day_name[i].lower() for i in range(7)] - self.a_weekday = a_weekday - self.f_weekday = f_weekday - - def __calc_month(self): - # Set self.f_month and self.a_month using the calendar module. - a_month = [calendar.month_abbr[i].lower() for i in range(13)] - f_month = [calendar.month_name[i].lower() for i in range(13)] - self.a_month = a_month - self.f_month = f_month - - def __calc_am_pm(self): - # Set self.am_pm by using time.strftime(). - - # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that - # magical; just happened to have used it everywhere else where a - # static date was needed. - am_pm = [] - for hour in (01,22): - time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) - am_pm.append(time.strftime("%p", time_tuple).lower()) - self.am_pm = am_pm - - def __calc_date_time(self): - # Set self.date_time, self.date, & self.time by using - # time.strftime(). - - # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of - # overloaded numbers is minimized. The order in which searches for - # values within the format string is very important; it eliminates - # possible ambiguity for what something represents. - time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) - date_time = [None, None, None] - date_time[0] = time.strftime("%c", time_tuple).lower() - date_time[1] = time.strftime("%x", time_tuple).lower() - date_time[2] = time.strftime("%X", time_tuple).lower() - replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), - (self.a_month[3], '%b'), (self.am_pm[1], '%p'), - ('1999', '%Y'), ('99', '%y'), ('22', '%H'), - ('44', '%M'), ('55', '%S'), ('76', '%j'), - ('17', '%d'), ('03', '%m'), ('3', '%m'), - # '3' needed for when no leading zero. - ('2', '%w'), ('10', '%I')] - replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone - for tz in tz_values]) - for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): - current_format = date_time[offset] - for old, new in replacement_pairs: - # Must deal with possible lack of locale info - # manifesting itself as the empty string (e.g., Swedish's - # lack of AM/PM info) or a platform returning a tuple of empty - # strings (e.g., MacOS 9 having timezone as ('','')). - if old: - current_format = current_format.replace(old, new) - # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since - # 2005-01-03 occurs before the first Monday of the year. Otherwise - # %U is used. - time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) - if '00' in time.strftime(directive, time_tuple): - U_W = '%W' - else: - U_W = '%U' - date_time[offset] = current_format.replace('11', U_W) - self.LC_date_time = date_time[0] - self.LC_date = date_time[1] - self.LC_time = date_time[2] - - def __calc_timezone(self): - # Set self.timezone by using time.tzname. - # Do not worry about possibility of time.tzname[0] == time.tzname[1] - # and time.daylight; handle that in strptime. - try: - time.tzset() - except AttributeError: - pass - self.tzname = time.tzname - self.daylight = time.daylight - no_saving = frozenset(["utc", "gmt", self.tzname[0].lower()]) - if self.daylight: - has_saving = frozenset([self.tzname[1].lower()]) - else: - has_saving = frozenset() - self.timezone = (no_saving, has_saving) - - -class TimeRE(dict): - """Handle conversion from format directives to regexes.""" - - def __init__(self, locale_time=None): - """Create keys/values. - - Order of execution is important for dependency reasons. - - """ - if locale_time: - self.locale_time = locale_time - else: - self.locale_time = LocaleTime() - base = super(TimeRE, self) - base.__init__({ - # The " \d" part of the regex is to make %c from ANSI C work - 'd': r"(?P3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", - 'f': r"(?P[0-9]{1,6})", - 'H': r"(?P2[0-3]|[0-1]\d|\d)", - 'I': r"(?P1[0-2]|0[1-9]|[1-9])", - 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", - 'm': r"(?P1[0-2]|0[1-9]|[1-9])", - 'M': r"(?P[0-5]\d|\d)", - 'S': r"(?P6[0-1]|[0-5]\d|\d)", - 'U': r"(?P5[0-3]|[0-4]\d|\d)", - 'w': r"(?P[0-6])", - # W is set below by using 'U' - 'y': r"(?P\d\d)", - #XXX: Does 'Y' need to worry about having less or more than - # 4 digits? - 'Y': r"(?P\d\d\d\d)", - 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), - 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), - 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), - 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), - 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), - 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone - for tz in tz_names), - 'Z'), - '%': '%'}) - base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) - base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - base.__setitem__('x', self.pattern(self.locale_time.LC_date)) - base.__setitem__('X', self.pattern(self.locale_time.LC_time)) - - def __seqToRE(self, to_convert, directive): - """Convert a list to a regex string for matching a directive. - - Want possible matching values to be from longest to shortest. This - prevents the possibility of a match occurring for a value that also - a substring of a larger value that should have matched (e.g., 'abc' - matching when 'abcdef' should have been the match). - - """ - to_convert = sorted(to_convert, key=len, reverse=True) - for value in to_convert: - if value != '': - break - else: - return '' - regex = '|'.join(re_escape(stuff) for stuff in to_convert) - regex = '(?P<%s>%s' % (directive, regex) - return '%s)' % regex - - def pattern(self, format): - """Return regex pattern for the format string. - - Need to make sure that any characters that might be interpreted as - regex syntax are escaped. - - """ - processed_format = '' - # The sub() call escapes all characters that might be misconstrued - # as regex syntax. Cannot use re.escape since we have to deal with - # format directives (%m, etc.). - regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") - format = regex_chars.sub(r"\\\1", format) - whitespace_replacement = re_compile('\s+') - format = whitespace_replacement.sub('\s+', format) - while '%' in format: - directive_index = format.index('%')+1 - processed_format = "%s%s%s" % (processed_format, - format[:directive_index-1], - self[format[directive_index]]) - format = format[directive_index+1:] - return "%s%s" % (processed_format, format) - - def compile(self, format): - """Return a compiled re object for the format string.""" - return re_compile(self.pattern(format), IGNORECASE) - -_cache_lock = _thread_allocate_lock() -# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock -# first! -_TimeRE_cache = TimeRE() -_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache -_regex_cache = {} - -def _calc_julian_from_U_or_W(year, week_of_year, day_of_week, week_starts_Mon): - """Calculate the Julian day based on the year, week of the year, and day of - the week, with week_start_day representing whether the week of the year - assumes the week starts on Sunday or Monday (6 or 0).""" - first_weekday = datetime_date(year, 1, 1).weekday() - # If we are dealing with the %U directive (week starts on Sunday), it's - # easier to just shift the view to Sunday being the first day of the - # week. - if not week_starts_Mon: - first_weekday = (first_weekday + 1) % 7 - day_of_week = (day_of_week + 1) % 7 - # Need to watch out for a week 0 (when the first day of the year is not - # the same as that specified by %U or %W). - week_0_length = (7 - first_weekday) % 7 - if week_of_year == 0: - return 1 + day_of_week - first_weekday - else: - days_to_week = week_0_length + (7 * (week_of_year - 1)) - return 1 + days_to_week + day_of_week - - -def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): - """Return a time struct based on the input string and the format string.""" - global _TimeRE_cache, _regex_cache - with _cache_lock: - locale_time = _TimeRE_cache.locale_time - if (_getlang() != locale_time.lang or - time.tzname != locale_time.tzname or - time.daylight != locale_time.daylight): - _TimeRE_cache = TimeRE() - _regex_cache.clear() - locale_time = _TimeRE_cache.locale_time - if len(_regex_cache) > _CACHE_MAX_SIZE: - _regex_cache.clear() - format_regex = _regex_cache.get(format) - if not format_regex: - try: - format_regex = _TimeRE_cache.compile(format) - # KeyError raised when a bad format is found; can be specified as - # \\, in which case it was a stray % but with a space after it - except KeyError, err: - bad_directive = err.args[0] - if bad_directive == "\\": - bad_directive = "%" - del err - raise ValueError("'%s' is a bad directive in format '%s'" % - (bad_directive, format)) - # IndexError only occurs when the format string is "%" - except IndexError: - raise ValueError("stray %% in format '%s'" % format) - _regex_cache[format] = format_regex - found = format_regex.match(data_string) - if not found: - raise ValueError("time data %r does not match format %r" % - (data_string, format)) - if len(data_string) != found.end(): - raise ValueError("unconverted data remains: %s" % - data_string[found.end():]) - - year = None - month = day = 1 - hour = minute = second = fraction = 0 - tz = -1 - # Default to -1 to signify that values not known; not critical to have, - # though - week_of_year = -1 - week_of_year_start = -1 - # weekday and julian defaulted to None so as to signal need to calculate - # values - weekday = julian = None - found_dict = found.groupdict() - for group_key in found_dict.iterkeys(): - # Directives not explicitly handled below: - # c, x, X - # handled by making out of other directives - # U, W - # worthless without day of the week - if group_key == 'y': - year = int(found_dict['y']) - # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 - if year <= 68: - year += 2000 - else: - year += 1900 - elif group_key == 'Y': - year = int(found_dict['Y']) - elif group_key == 'm': - month = int(found_dict['m']) - elif group_key == 'B': - month = locale_time.f_month.index(found_dict['B'].lower()) - elif group_key == 'b': - month = locale_time.a_month.index(found_dict['b'].lower()) - elif group_key == 'd': - day = int(found_dict['d']) - elif group_key == 'H': - hour = int(found_dict['H']) - elif group_key == 'I': - hour = int(found_dict['I']) - ampm = found_dict.get('p', '').lower() - # If there was no AM/PM indicator, we'll treat this like AM - if ampm in ('', locale_time.am_pm[0]): - # We're in AM so the hour is correct unless we're - # looking at 12 midnight. - # 12 midnight == 12 AM == hour 0 - if hour == 12: - hour = 0 - elif ampm == locale_time.am_pm[1]: - # We're in PM so we need to add 12 to the hour unless - # we're looking at 12 noon. - # 12 noon == 12 PM == hour 12 - if hour != 12: - hour += 12 - elif group_key == 'M': - minute = int(found_dict['M']) - elif group_key == 'S': - second = int(found_dict['S']) - elif group_key == 'f': - s = found_dict['f'] - # Pad to always return microseconds. - s += "0" * (6 - len(s)) - fraction = int(s) - elif group_key == 'A': - weekday = locale_time.f_weekday.index(found_dict['A'].lower()) - elif group_key == 'a': - weekday = locale_time.a_weekday.index(found_dict['a'].lower()) - elif group_key == 'w': - weekday = int(found_dict['w']) - if weekday == 0: - weekday = 6 - else: - weekday -= 1 - elif group_key == 'j': - julian = int(found_dict['j']) - elif group_key in ('U', 'W'): - week_of_year = int(found_dict[group_key]) - if group_key == 'U': - # U starts week on Sunday. - week_of_year_start = 6 - else: - # W starts week on Monday. - week_of_year_start = 0 - elif group_key == 'Z': - # Since -1 is default value only need to worry about setting tz if - # it can be something other than -1. - found_zone = found_dict['Z'].lower() - for value, tz_values in enumerate(locale_time.timezone): - if found_zone in tz_values: - # Deal with bad locale setup where timezone names are the - # same and yet time.daylight is true; too ambiguous to - # be able to tell what timezone has daylight savings - if (time.tzname[0] == time.tzname[1] and - time.daylight and found_zone not in ("utc", "gmt")): - break - else: - tz = value - break - leap_year_fix = False - if year is None and month == 2 and day == 29: - year = 1904 # 1904 is first leap year of 20th century - leap_year_fix = True - elif year is None: - year = 1900 - # If we know the week of the year and what day of that week, we can figure - # out the Julian day of the year. - if julian is None and week_of_year != -1 and weekday is not None: - week_starts_Mon = True if week_of_year_start == 0 else False - julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, - week_starts_Mon) - if julian <= 0: - year -= 1 - yday = 366 if calendar.isleap(year) else 365 - julian += yday - # Cannot pre-calculate datetime_date() since can change in Julian - # calculation and thus could have different value for the day of the week - # calculation. - if julian is None: - # Need to add 1 to result since first day of the year is 1, not 0. - julian = datetime_date(year, month, day).toordinal() - \ - datetime_date(year, 1, 1).toordinal() + 1 - else: # Assume that if they bothered to include Julian day it will - # be accurate. - datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal()) - year = datetime_result.year - month = datetime_result.month - day = datetime_result.day - if weekday is None: - weekday = datetime_date(year, month, day).weekday() - if leap_year_fix: - # the caller didn't supply a year but asked for Feb 29th. We couldn't - # use the default of 1900 for computations. We set it back to ensure - # that February 29th is smaller than March 1st. - year = 1900 - - return (time.struct_time((year, month, day, - hour, minute, second, - weekday, julian, tz)), fraction) - -def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"): - return _strptime(data_string, format)[0] diff --git a/python/Lib/_threading_local.py b/python/Lib/_threading_local.py deleted file mode 100755 index 09a3515bdb..0000000000 --- a/python/Lib/_threading_local.py +++ /dev/null @@ -1,251 +0,0 @@ -"""Thread-local objects. - -(Note that this module provides a Python version of the threading.local - class. Depending on the version of Python you're using, there may be a - faster one available. You should always import the `local` class from - `threading`.) - -Thread-local objects support the management of thread-local data. -If you have data that you want to be local to a thread, simply create -a thread-local object and use its attributes: - - >>> mydata = local() - >>> mydata.number = 42 - >>> mydata.number - 42 - -You can also access the local-object's dictionary: - - >>> mydata.__dict__ - {'number': 42} - >>> mydata.__dict__.setdefault('widgets', []) - [] - >>> mydata.widgets - [] - -What's important about thread-local objects is that their data are -local to a thread. If we access the data in a different thread: - - >>> log = [] - >>> def f(): - ... items = mydata.__dict__.items() - ... items.sort() - ... log.append(items) - ... mydata.number = 11 - ... log.append(mydata.number) - - >>> import threading - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[], 11] - -we get different data. Furthermore, changes made in the other thread -don't affect data seen in this thread: - - >>> mydata.number - 42 - -Of course, values you get from a local object, including a __dict__ -attribute, are for whatever thread was current at the time the -attribute was read. For that reason, you generally don't want to save -these values across threads, as they apply only to the thread they -came from. - -You can create custom local objects by subclassing the local class: - - >>> class MyLocal(local): - ... number = 2 - ... initialized = False - ... def __init__(self, **kw): - ... if self.initialized: - ... raise SystemError('__init__ called too many times') - ... self.initialized = True - ... self.__dict__.update(kw) - ... def squared(self): - ... return self.number ** 2 - -This can be useful to support default values, methods and -initialization. Note that if you define an __init__ method, it will be -called each time the local object is used in a separate thread. This -is necessary to initialize each thread's dictionary. - -Now if we create a local object: - - >>> mydata = MyLocal(color='red') - -Now we have a default number: - - >>> mydata.number - 2 - -an initial color: - - >>> mydata.color - 'red' - >>> del mydata.color - -And a method that operates on the data: - - >>> mydata.squared() - 4 - -As before, we can access the data in a separate thread: - - >>> log = [] - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[('color', 'red'), ('initialized', True)], 11] - -without affecting this thread's data: - - >>> mydata.number - 2 - >>> mydata.color - Traceback (most recent call last): - ... - AttributeError: 'MyLocal' object has no attribute 'color' - -Note that subclasses can define slots, but they are not thread -local. They are shared across threads: - - >>> class MyLocal(local): - ... __slots__ = 'number' - - >>> mydata = MyLocal() - >>> mydata.number = 42 - >>> mydata.color = 'red' - -So, the separate thread: - - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - -affects what we see: - - >>> mydata.number - 11 - ->>> del mydata -""" - -__all__ = ["local"] - -# We need to use objects from the threading module, but the threading -# module may also want to use our `local` class, if support for locals -# isn't compiled in to the `thread` module. This creates potential problems -# with circular imports. For that reason, we don't import `threading` -# until the bottom of this file (a hack sufficient to worm around the -# potential problems). Note that almost all platforms do have support for -# locals in the `thread` module, and there is no circular import problem -# then, so problems introduced by fiddling the order of imports here won't -# manifest on most boxes. - -class _localbase(object): - __slots__ = '_local__key', '_local__args', '_local__lock' - - def __new__(cls, *args, **kw): - self = object.__new__(cls) - key = '_local__key', 'thread.local.' + str(id(self)) - object.__setattr__(self, '_local__key', key) - object.__setattr__(self, '_local__args', (args, kw)) - object.__setattr__(self, '_local__lock', RLock()) - - if (args or kw) and (cls.__init__ is object.__init__): - raise TypeError("Initialization arguments are not supported") - - # We need to create the thread dict in anticipation of - # __init__ being called, to make sure we don't call it - # again ourselves. - dict = object.__getattribute__(self, '__dict__') - current_thread().__dict__[key] = dict - - return self - -def _patch(self): - key = object.__getattribute__(self, '_local__key') - d = current_thread().__dict__.get(key) - if d is None: - d = {} - current_thread().__dict__[key] = d - object.__setattr__(self, '__dict__', d) - - # we have a new instance dict, so call out __init__ if we have - # one - cls = type(self) - if cls.__init__ is not object.__init__: - args, kw = object.__getattribute__(self, '_local__args') - cls.__init__(self, *args, **kw) - else: - object.__setattr__(self, '__dict__', d) - -class local(_localbase): - - def __getattribute__(self, name): - lock = object.__getattribute__(self, '_local__lock') - lock.acquire() - try: - _patch(self) - return object.__getattribute__(self, name) - finally: - lock.release() - - def __setattr__(self, name, value): - if name == '__dict__': - raise AttributeError( - "%r object attribute '__dict__' is read-only" - % self.__class__.__name__) - lock = object.__getattribute__(self, '_local__lock') - lock.acquire() - try: - _patch(self) - return object.__setattr__(self, name, value) - finally: - lock.release() - - def __delattr__(self, name): - if name == '__dict__': - raise AttributeError( - "%r object attribute '__dict__' is read-only" - % self.__class__.__name__) - lock = object.__getattribute__(self, '_local__lock') - lock.acquire() - try: - _patch(self) - return object.__delattr__(self, name) - finally: - lock.release() - - def __del__(self): - import threading - - key = object.__getattribute__(self, '_local__key') - - try: - # We use the non-locking API since we might already hold the lock - # (__del__ can be called at any point by the cyclic GC). - threads = threading._enumerate() - except: - # If enumerating the current threads fails, as it seems to do - # during shutdown, we'll skip cleanup under the assumption - # that there is nothing to clean up. - return - - for thread in threads: - try: - __dict__ = thread.__dict__ - except AttributeError: - # Thread is dying, rest in peace. - continue - - if key in __dict__: - try: - del __dict__[key] - except KeyError: - pass # didn't have anything in this thread - -from threading import current_thread, RLock diff --git a/python/Lib/_weakrefset.py b/python/Lib/_weakrefset.py deleted file mode 100644 index 627959b624..0000000000 --- a/python/Lib/_weakrefset.py +++ /dev/null @@ -1,204 +0,0 @@ -# Access WeakSet through the weakref module. -# This code is separated-out because it is needed -# by abc.py to load everything else at startup. - -from _weakref import ref - -__all__ = ['WeakSet'] - - -class _IterationGuard(object): - # This context manager registers itself in the current iterators of the - # weak container, such as to delay all removals until the context manager - # exits. - # This technique should be relatively thread-safe (since sets are). - - def __init__(self, weakcontainer): - # Don't create cycles - self.weakcontainer = ref(weakcontainer) - - def __enter__(self): - w = self.weakcontainer() - if w is not None: - w._iterating.add(self) - return self - - def __exit__(self, e, t, b): - w = self.weakcontainer() - if w is not None: - s = w._iterating - s.remove(self) - if not s: - w._commit_removals() - - -class WeakSet(object): - def __init__(self, data=None): - self.data = set() - def _remove(item, selfref=ref(self)): - self = selfref() - if self is not None: - if self._iterating: - self._pending_removals.append(item) - else: - self.data.discard(item) - self._remove = _remove - # A list of keys to be removed - self._pending_removals = [] - self._iterating = set() - if data is not None: - self.update(data) - - def _commit_removals(self): - l = self._pending_removals - discard = self.data.discard - while l: - discard(l.pop()) - - def __iter__(self): - with _IterationGuard(self): - for itemref in self.data: - item = itemref() - if item is not None: - # Caveat: the iterator will keep a strong reference to - # `item` until it is resumed or closed. - yield item - - def __len__(self): - return len(self.data) - len(self._pending_removals) - - def __contains__(self, item): - try: - wr = ref(item) - except TypeError: - return False - return wr in self.data - - def __reduce__(self): - return (self.__class__, (list(self),), - getattr(self, '__dict__', None)) - - __hash__ = None - - def add(self, item): - if self._pending_removals: - self._commit_removals() - self.data.add(ref(item, self._remove)) - - def clear(self): - if self._pending_removals: - self._commit_removals() - self.data.clear() - - def copy(self): - return self.__class__(self) - - def pop(self): - if self._pending_removals: - self._commit_removals() - while True: - try: - itemref = self.data.pop() - except KeyError: - raise KeyError('pop from empty WeakSet') - item = itemref() - if item is not None: - return item - - def remove(self, item): - if self._pending_removals: - self._commit_removals() - self.data.remove(ref(item)) - - def discard(self, item): - if self._pending_removals: - self._commit_removals() - self.data.discard(ref(item)) - - def update(self, other): - if self._pending_removals: - self._commit_removals() - for element in other: - self.add(element) - - def __ior__(self, other): - self.update(other) - return self - - def difference(self, other): - newset = self.copy() - newset.difference_update(other) - return newset - __sub__ = difference - - def difference_update(self, other): - self.__isub__(other) - def __isub__(self, other): - if self._pending_removals: - self._commit_removals() - if self is other: - self.data.clear() - else: - self.data.difference_update(ref(item) for item in other) - return self - - def intersection(self, other): - return self.__class__(item for item in other if item in self) - __and__ = intersection - - def intersection_update(self, other): - self.__iand__(other) - def __iand__(self, other): - if self._pending_removals: - self._commit_removals() - self.data.intersection_update(ref(item) for item in other) - return self - - def issubset(self, other): - return self.data.issubset(ref(item) for item in other) - __le__ = issubset - - def __lt__(self, other): - return self.data < set(ref(item) for item in other) - - def issuperset(self, other): - return self.data.issuperset(ref(item) for item in other) - __ge__ = issuperset - - def __gt__(self, other): - return self.data > set(ref(item) for item in other) - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return self.data == set(ref(item) for item in other) - - def __ne__(self, other): - opposite = self.__eq__(other) - if opposite is NotImplemented: - return NotImplemented - return not opposite - - def symmetric_difference(self, other): - newset = self.copy() - newset.symmetric_difference_update(other) - return newset - __xor__ = symmetric_difference - - def symmetric_difference_update(self, other): - self.__ixor__(other) - def __ixor__(self, other): - if self._pending_removals: - self._commit_removals() - if self is other: - self.data.clear() - else: - self.data.symmetric_difference_update(ref(item, self._remove) for item in other) - return self - - def union(self, other): - return self.__class__(e for s in (self, other) for e in s) - __or__ = union - - def isdisjoint(self, other): - return len(self.intersection(other)) == 0 diff --git a/python/Lib/abc.py b/python/Lib/abc.py deleted file mode 100755 index 02e48a1bb3..0000000000 --- a/python/Lib/abc.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright 2007 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Abstract Base Classes (ABCs) according to PEP 3119.""" - -import types - -from _weakrefset import WeakSet - -# Instance of old-style class -class _C: pass -_InstanceType = type(_C()) - - -def abstractmethod(funcobj): - """A decorator indicating abstract methods. - - Requires that the metaclass is ABCMeta or derived from it. A - class that has a metaclass derived from ABCMeta cannot be - instantiated unless all of its abstract methods are overridden. - The abstract methods can be called using any of the normal - 'super' call mechanisms. - - Usage: - - class C: - __metaclass__ = ABCMeta - @abstractmethod - def my_abstract_method(self, ...): - ... - """ - funcobj.__isabstractmethod__ = True - return funcobj - - -class abstractproperty(property): - """A decorator indicating abstract properties. - - Requires that the metaclass is ABCMeta or derived from it. A - class that has a metaclass derived from ABCMeta cannot be - instantiated unless all of its abstract properties are overridden. - The abstract properties can be called using any of the normal - 'super' call mechanisms. - - Usage: - - class C: - __metaclass__ = ABCMeta - @abstractproperty - def my_abstract_property(self): - ... - - This defines a read-only property; you can also define a read-write - abstract property using the 'long' form of property declaration: - - class C: - __metaclass__ = ABCMeta - def getx(self): ... - def setx(self, value): ... - x = abstractproperty(getx, setx) - """ - __isabstractmethod__ = True - - -class ABCMeta(type): - - """Metaclass for defining Abstract Base Classes (ABCs). - - Use this metaclass to create an ABC. An ABC can be subclassed - directly, and then acts as a mix-in class. You can also register - unrelated concrete classes (even built-in classes) and unrelated - ABCs as 'virtual subclasses' -- these and their descendants will - be considered subclasses of the registering ABC by the built-in - issubclass() function, but the registering ABC won't show up in - their MRO (Method Resolution Order) nor will method - implementations defined by the registering ABC be callable (not - even via super()). - - """ - - # A global counter that is incremented each time a class is - # registered as a virtual subclass of anything. It forces the - # negative cache to be cleared before its next use. - _abc_invalidation_counter = 0 - - def __new__(mcls, name, bases, namespace): - cls = super(ABCMeta, mcls).__new__(mcls, name, bases, namespace) - # Compute set of abstract method names - abstracts = set(name - for name, value in namespace.items() - if getattr(value, "__isabstractmethod__", False)) - for base in bases: - for name in getattr(base, "__abstractmethods__", set()): - value = getattr(cls, name, None) - if getattr(value, "__isabstractmethod__", False): - abstracts.add(name) - cls.__abstractmethods__ = frozenset(abstracts) - # Set up inheritance registry - cls._abc_registry = WeakSet() - cls._abc_cache = WeakSet() - cls._abc_negative_cache = WeakSet() - cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter - return cls - - def register(cls, subclass): - """Register a virtual subclass of an ABC.""" - if not isinstance(subclass, (type, types.ClassType)): - raise TypeError("Can only register classes") - if issubclass(subclass, cls): - return # Already a subclass - # Subtle: test for cycles *after* testing for "already a subclass"; - # this means we allow X.register(X) and interpret it as a no-op. - if issubclass(cls, subclass): - # This would create a cycle, which is bad for the algorithm below - raise RuntimeError("Refusing to create an inheritance cycle") - cls._abc_registry.add(subclass) - ABCMeta._abc_invalidation_counter += 1 # Invalidate negative cache - - def _dump_registry(cls, file=None): - """Debug helper to print the ABC registry.""" - print >> file, "Class: %s.%s" % (cls.__module__, cls.__name__) - print >> file, "Inv.counter: %s" % ABCMeta._abc_invalidation_counter - for name in sorted(cls.__dict__.keys()): - if name.startswith("_abc_"): - value = getattr(cls, name) - print >> file, "%s: %r" % (name, value) - - def __instancecheck__(cls, instance): - """Override for isinstance(instance, cls).""" - # Inline the cache checking when it's simple. - subclass = getattr(instance, '__class__', None) - if subclass is not None and subclass in cls._abc_cache: - return True - subtype = type(instance) - # Old-style instances - if subtype is _InstanceType: - subtype = subclass - if subtype is subclass or subclass is None: - if (cls._abc_negative_cache_version == - ABCMeta._abc_invalidation_counter and - subtype in cls._abc_negative_cache): - return False - # Fall back to the subclass check. - return cls.__subclasscheck__(subtype) - return (cls.__subclasscheck__(subclass) or - cls.__subclasscheck__(subtype)) - - def __subclasscheck__(cls, subclass): - """Override for issubclass(subclass, cls).""" - # Check cache - if subclass in cls._abc_cache: - return True - # Check negative cache; may have to invalidate - if cls._abc_negative_cache_version < ABCMeta._abc_invalidation_counter: - # Invalidate the negative cache - cls._abc_negative_cache = WeakSet() - cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter - elif subclass in cls._abc_negative_cache: - return False - # Check the subclass hook - ok = cls.__subclasshook__(subclass) - if ok is not NotImplemented: - assert isinstance(ok, bool) - if ok: - cls._abc_cache.add(subclass) - else: - cls._abc_negative_cache.add(subclass) - return ok - # Check if it's a direct subclass - if cls in getattr(subclass, '__mro__', ()): - cls._abc_cache.add(subclass) - return True - # Check if it's a subclass of a registered class (recursive) - for rcls in cls._abc_registry: - if issubclass(subclass, rcls): - cls._abc_cache.add(subclass) - return True - # Check if it's a subclass of a subclass (recursive) - for scls in cls.__subclasses__(): - if issubclass(subclass, scls): - cls._abc_cache.add(subclass) - return True - # No dice; update negative cache - cls._abc_negative_cache.add(subclass) - return False diff --git a/python/Lib/aifc.py b/python/Lib/aifc.py deleted file mode 100755 index c9a021ee9d..0000000000 --- a/python/Lib/aifc.py +++ /dev/null @@ -1,989 +0,0 @@ -"""Stuff to parse AIFF-C and AIFF files. - -Unless explicitly stated otherwise, the description below is true -both for AIFF-C files and AIFF files. - -An AIFF-C file has the following structure. - - +-----------------+ - | FORM | - +-----------------+ - | | - +----+------------+ - | | AIFC | - | +------------+ - | | | - | | . | - | | . | - | | . | - +----+------------+ - -An AIFF file has the string "AIFF" instead of "AIFC". - -A chunk consists of an identifier (4 bytes) followed by a size (4 bytes, -big endian order), followed by the data. The size field does not include -the size of the 8 byte header. - -The following chunk types are recognized. - - FVER - (AIFF-C only). - MARK - <# of markers> (2 bytes) - list of markers: - (2 bytes, must be > 0) - (4 bytes) - ("pstring") - COMM - <# of channels> (2 bytes) - <# of sound frames> (4 bytes) - (2 bytes) - (10 bytes, IEEE 80-bit extended - floating point) - in AIFF-C files only: - (4 bytes) - ("pstring") - SSND - (4 bytes, not used by this program) - (4 bytes, not used by this program) - - -A pstring consists of 1 byte length, a string of characters, and 0 or 1 -byte pad to make the total length even. - -Usage. - -Reading AIFF files: - f = aifc.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -In some types of audio files, if the setpos() method is not used, -the seek() method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' for AIFF files) - getcompname() -- returns human-readable version of - compression type ('not compressed' for AIFF files) - getparams() -- returns a tuple consisting of all of the - above in the above order - getmarkers() -- get the list of marks in the audio file or None - if there are no marks - getmark(id) -- get mark with the specified id (raises an error - if the mark does not exist) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell(), the position given to setpos() and -the position of marks are all compatible and have nothing to do with -the actual position in the file. -The close() method is called automatically when the class instance -is destroyed. - -Writing AIFF files: - f = aifc.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - aiff() -- create an AIFF file (AIFF-C default) - aifc() -- create an AIFF-C file - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple) - -- set all parameters at once - setmark(id, pos, name) - -- add specified mark to the list of marks - tell() -- return current position in output file (useful - in combination with setmark()) - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes('') or -close() to patch up the sizes in the header. -Marks can be added anytime. If there are any marks, you must call -close() after all frames have been written. -The close() method is called automatically when the class instance -is destroyed. - -When a file is opened with the extension '.aiff', an AIFF file is -written, otherwise an AIFF-C file is written. This default can be -changed by calling aiff() or aifc() before the first writeframes or -writeframesraw. -""" - -import struct -import __builtin__ - -__all__ = ["Error","open","openfp"] - -class Error(Exception): - pass - -_AIFC_version = 0xA2805140L # Version 1 of AIFF-C - -def _read_long(file): - try: - return struct.unpack('>l', file.read(4))[0] - except struct.error: - raise EOFError - -def _read_ulong(file): - try: - return struct.unpack('>L', file.read(4))[0] - except struct.error: - raise EOFError - -def _read_short(file): - try: - return struct.unpack('>h', file.read(2))[0] - except struct.error: - raise EOFError - -def _read_ushort(file): - try: - return struct.unpack('>H', file.read(2))[0] - except struct.error: - raise EOFError - -def _read_string(file): - length = ord(file.read(1)) - if length == 0: - data = '' - else: - data = file.read(length) - if length & 1 == 0: - dummy = file.read(1) - return data - -_HUGE_VAL = 1.79769313486231e+308 # See - -def _read_float(f): # 10 bytes - expon = _read_short(f) # 2 bytes - sign = 1 - if expon < 0: - sign = -1 - expon = expon + 0x8000 - himant = _read_ulong(f) # 4 bytes - lomant = _read_ulong(f) # 4 bytes - if expon == himant == lomant == 0: - f = 0.0 - elif expon == 0x7FFF: - f = _HUGE_VAL - else: - expon = expon - 16383 - f = (himant * 0x100000000L + lomant) * pow(2.0, expon - 63) - return sign * f - -def _write_short(f, x): - f.write(struct.pack('>h', x)) - -def _write_ushort(f, x): - f.write(struct.pack('>H', x)) - -def _write_long(f, x): - f.write(struct.pack('>l', x)) - -def _write_ulong(f, x): - f.write(struct.pack('>L', x)) - -def _write_string(f, s): - if len(s) > 255: - raise ValueError("string exceeds maximum pstring length") - f.write(struct.pack('B', len(s))) - f.write(s) - if len(s) & 1 == 0: - f.write(chr(0)) - -def _write_float(f, x): - import math - if x < 0: - sign = 0x8000 - x = x * -1 - else: - sign = 0 - if x == 0: - expon = 0 - himant = 0 - lomant = 0 - else: - fmant, expon = math.frexp(x) - if expon > 16384 or fmant >= 1 or fmant != fmant: # Infinity or NaN - expon = sign|0x7FFF - himant = 0 - lomant = 0 - else: # Finite - expon = expon + 16382 - if expon < 0: # denormalized - fmant = math.ldexp(fmant, expon) - expon = 0 - expon = expon | sign - fmant = math.ldexp(fmant, 32) - fsmant = math.floor(fmant) - himant = long(fsmant) - fmant = math.ldexp(fmant - fsmant, 32) - fsmant = math.floor(fmant) - lomant = long(fsmant) - _write_ushort(f, expon) - _write_ulong(f, himant) - _write_ulong(f, lomant) - -from chunk import Chunk - -class Aifc_read: - # Variables used in this class: - # - # These variables are available to the user though appropriate - # methods of this class: - # _file -- the open file with methods read(), close(), and seek() - # set through the __init__() method - # _nchannels -- the number of audio channels - # available through the getnchannels() method - # _nframes -- the number of audio frames - # available through the getnframes() method - # _sampwidth -- the number of bytes per audio sample - # available through the getsampwidth() method - # _framerate -- the sampling frequency - # available through the getframerate() method - # _comptype -- the AIFF-C compression type ('NONE' if AIFF) - # available through the getcomptype() method - # _compname -- the human-readable AIFF-C compression type - # available through the getcomptype() method - # _markers -- the marks in the audio file - # available through the getmarkers() and getmark() - # methods - # _soundpos -- the position in the audio stream - # available through the tell() method, set through the - # setpos() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _decomp -- the decompressor from builtin module cl - # _comm_chunk_read -- 1 iff the COMM chunk has been read - # _aifc -- 1 iff reading an AIFF-C file - # _ssnd_seek_needed -- 1 iff positioned correctly in audio - # file for readframes() - # _ssnd_chunk -- instantiation of a chunk class for the SSND chunk - # _framesize -- size of one frame in the file - - def initfp(self, file): - self._version = 0 - self._decomp = None - self._convert = None - self._markers = [] - self._soundpos = 0 - self._file = file - chunk = Chunk(file) - if chunk.getname() != 'FORM': - raise Error, 'file does not start with FORM id' - formdata = chunk.read(4) - if formdata == 'AIFF': - self._aifc = 0 - elif formdata == 'AIFC': - self._aifc = 1 - else: - raise Error, 'not an AIFF or AIFF-C file' - self._comm_chunk_read = 0 - while 1: - self._ssnd_seek_needed = 1 - try: - chunk = Chunk(self._file) - except EOFError: - break - chunkname = chunk.getname() - if chunkname == 'COMM': - self._read_comm_chunk(chunk) - self._comm_chunk_read = 1 - elif chunkname == 'SSND': - self._ssnd_chunk = chunk - dummy = chunk.read(8) - self._ssnd_seek_needed = 0 - elif chunkname == 'FVER': - self._version = _read_ulong(chunk) - elif chunkname == 'MARK': - self._readmark(chunk) - chunk.skip() - if not self._comm_chunk_read or not self._ssnd_chunk: - raise Error, 'COMM chunk and/or SSND chunk missing' - if self._aifc and self._decomp: - import cl - params = [cl.ORIGINAL_FORMAT, 0, - cl.BITS_PER_COMPONENT, self._sampwidth * 8, - cl.FRAME_RATE, self._framerate] - if self._nchannels == 1: - params[1] = cl.MONO - elif self._nchannels == 2: - params[1] = cl.STEREO_INTERLEAVED - else: - raise Error, 'cannot compress more than 2 channels' - self._decomp.SetParams(params) - - def __init__(self, f): - if type(f) == type(''): - f = __builtin__.open(f, 'rb') - # else, assume it is an open file object already - self.initfp(f) - - # - # User visible methods. - # - def getfp(self): - return self._file - - def rewind(self): - self._ssnd_seek_needed = 1 - self._soundpos = 0 - - def close(self): - decomp = self._decomp - try: - if decomp: - self._decomp = None - decomp.CloseDecompressor() - finally: - self._file.close() - - def tell(self): - return self._soundpos - - def getnchannels(self): - return self._nchannels - - def getnframes(self): - return self._nframes - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def getversion(self): -## return self._version - - def getparams(self): - return self.getnchannels(), self.getsampwidth(), \ - self.getframerate(), self.getnframes(), \ - self.getcomptype(), self.getcompname() - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error, 'marker %r does not exist' % (id,) - - def setpos(self, pos): - if pos < 0 or pos > self._nframes: - raise Error, 'position not in range' - self._soundpos = pos - self._ssnd_seek_needed = 1 - - def readframes(self, nframes): - if self._ssnd_seek_needed: - self._ssnd_chunk.seek(0) - dummy = self._ssnd_chunk.read(8) - pos = self._soundpos * self._framesize - if pos: - self._ssnd_chunk.seek(pos + 8) - self._ssnd_seek_needed = 0 - if nframes == 0: - return '' - data = self._ssnd_chunk.read(nframes * self._framesize) - if self._convert and data: - data = self._convert(data) - self._soundpos = self._soundpos + len(data) // (self._nchannels * self._sampwidth) - return data - - # - # Internal methods. - # - - def _decomp_data(self, data): - import cl - dummy = self._decomp.SetParam(cl.FRAME_BUFFER_SIZE, - len(data) * 2) - return self._decomp.Decompress(len(data) // self._nchannels, - data) - - def _ulaw2lin(self, data): - import audioop - return audioop.ulaw2lin(data, 2) - - def _adpcm2lin(self, data): - import audioop - if not hasattr(self, '_adpcmstate'): - # first time - self._adpcmstate = None - data, self._adpcmstate = audioop.adpcm2lin(data, 2, - self._adpcmstate) - return data - - def _read_comm_chunk(self, chunk): - self._nchannels = _read_short(chunk) - self._nframes = _read_long(chunk) - self._sampwidth = (_read_short(chunk) + 7) // 8 - self._framerate = int(_read_float(chunk)) - self._framesize = self._nchannels * self._sampwidth - if self._aifc: - #DEBUG: SGI's soundeditor produces a bad size :-( - kludge = 0 - if chunk.chunksize == 18: - kludge = 1 - print 'Warning: bad COMM chunk size' - chunk.chunksize = 23 - #DEBUG end - self._comptype = chunk.read(4) - #DEBUG start - if kludge: - length = ord(chunk.file.read(1)) - if length & 1 == 0: - length = length + 1 - chunk.chunksize = chunk.chunksize + length - chunk.file.seek(-1, 1) - #DEBUG end - self._compname = _read_string(chunk) - if self._comptype != 'NONE': - if self._comptype == 'G722': - try: - import audioop - except ImportError: - pass - else: - self._convert = self._adpcm2lin - self._sampwidth = 2 - return - # for ULAW and ALAW try Compression Library - try: - import cl - except ImportError: - if self._comptype in ('ULAW', 'ulaw'): - try: - import audioop - self._convert = self._ulaw2lin - self._sampwidth = 2 - return - except ImportError: - pass - raise Error, 'cannot read compressed AIFF-C files' - if self._comptype in ('ULAW', 'ulaw'): - scheme = cl.G711_ULAW - elif self._comptype in ('ALAW', 'alaw'): - scheme = cl.G711_ALAW - else: - raise Error, 'unsupported compression type' - self._decomp = cl.OpenDecompressor(scheme) - self._convert = self._decomp_data - self._sampwidth = 2 - else: - self._comptype = 'NONE' - self._compname = 'not compressed' - - def _readmark(self, chunk): - nmarkers = _read_short(chunk) - # Some files appear to contain invalid counts. - # Cope with this by testing for EOF. - try: - for i in range(nmarkers): - id = _read_short(chunk) - pos = _read_long(chunk) - name = _read_string(chunk) - if pos or name: - # some files appear to have - # dummy markers consisting of - # a position 0 and name '' - self._markers.append((id, pos, name)) - except EOFError: - print 'Warning: MARK chunk contains only', - print len(self._markers), - if len(self._markers) == 1: print 'marker', - else: print 'markers', - print 'instead of', nmarkers - -class Aifc_write: - # Variables used in this class: - # - # These variables are user settable through appropriate methods - # of this class: - # _file -- the open file with methods write(), close(), tell(), seek() - # set through the __init__() method - # _comptype -- the AIFF-C compression type ('NONE' in AIFF) - # set through the setcomptype() or setparams() method - # _compname -- the human-readable AIFF-C compression type - # set through the setcomptype() or setparams() method - # _nchannels -- the number of audio channels - # set through the setnchannels() or setparams() method - # _sampwidth -- the number of bytes per audio sample - # set through the setsampwidth() or setparams() method - # _framerate -- the sampling frequency - # set through the setframerate() or setparams() method - # _nframes -- the number of audio frames written to the header - # set through the setnframes() or setparams() method - # _aifc -- whether we're writing an AIFF-C file or an AIFF file - # set through the aifc() method, reset through the - # aiff() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _comp -- the compressor from builtin module cl - # _nframeswritten -- the number of audio frames actually written - # _datalength -- the size of the audio samples written to the header - # _datawritten -- the size of the audio samples actually written - - def __init__(self, f): - if type(f) == type(''): - filename = f - f = __builtin__.open(f, 'wb') - else: - # else, assume it is an open file object already - filename = '???' - self.initfp(f) - if filename[-5:] == '.aiff': - self._aifc = 0 - else: - self._aifc = 1 - - def initfp(self, file): - self._file = file - self._version = _AIFC_version - self._comptype = 'NONE' - self._compname = 'not compressed' - self._comp = None - self._convert = None - self._nchannels = 0 - self._sampwidth = 0 - self._framerate = 0 - self._nframes = 0 - self._nframeswritten = 0 - self._datawritten = 0 - self._datalength = 0 - self._markers = [] - self._marklength = 0 - self._aifc = 1 # AIFF-C is default - - def __del__(self): - if self._file: - self.close() - - # - # User visible methods. - # - def aiff(self): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - self._aifc = 0 - - def aifc(self): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - self._aifc = 1 - - def setnchannels(self, nchannels): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - if nchannels < 1: - raise Error, 'bad # of channels' - self._nchannels = nchannels - - def getnchannels(self): - if not self._nchannels: - raise Error, 'number of channels not set' - return self._nchannels - - def setsampwidth(self, sampwidth): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - if sampwidth < 1 or sampwidth > 4: - raise Error, 'bad sample width' - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._sampwidth: - raise Error, 'sample width not set' - return self._sampwidth - - def setframerate(self, framerate): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - if framerate <= 0: - raise Error, 'bad frame rate' - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error, 'frame rate not set' - return self._framerate - - def setnframes(self, nframes): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, comptype, compname): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - if comptype not in ('NONE', 'ULAW', 'ulaw', 'ALAW', 'alaw', 'G722'): - raise Error, 'unsupported compression type' - self._comptype = comptype - self._compname = compname - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def setversion(self, version): -## if self._nframeswritten: -## raise Error, 'cannot change parameters after starting to write' -## self._version = version - - def setparams(self, info): - nchannels, sampwidth, framerate, nframes, comptype, compname = info - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - if comptype not in ('NONE', 'ULAW', 'ulaw', 'ALAW', 'alaw', 'G722'): - raise Error, 'unsupported compression type' - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - if not self._nchannels or not self._sampwidth or not self._framerate: - raise Error, 'not all parameters set' - return self._nchannels, self._sampwidth, self._framerate, \ - self._nframes, self._comptype, self._compname - - def setmark(self, id, pos, name): - if id <= 0: - raise Error, 'marker ID must be > 0' - if pos < 0: - raise Error, 'marker position must be >= 0' - if type(name) != type(''): - raise Error, 'marker name must be a string' - for i in range(len(self._markers)): - if id == self._markers[i][0]: - self._markers[i] = id, pos, name - return - self._markers.append((id, pos, name)) - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error, 'marker %r does not exist' % (id,) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - self._ensure_header_written(len(data)) - nframes = len(data) // (self._sampwidth * self._nchannels) - if self._convert: - data = self._convert(data) - self._file.write(data) - self._nframeswritten = self._nframeswritten + nframes - self._datawritten = self._datawritten + len(data) - - def writeframes(self, data): - self.writeframesraw(data) - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - - def close(self): - if self._file is None: - return - try: - self._ensure_header_written(0) - if self._datawritten & 1: - # quick pad to even size - self._file.write(chr(0)) - self._datawritten = self._datawritten + 1 - self._writemarkers() - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten or \ - self._marklength: - self._patchheader() - if self._comp: - self._comp.CloseCompressor() - self._comp = None - finally: - # Prevent ref cycles - self._convert = None - f = self._file - self._file = None - f.close() - - # - # Internal methods. - # - - def _comp_data(self, data): - import cl - dummy = self._comp.SetParam(cl.FRAME_BUFFER_SIZE, len(data)) - dummy = self._comp.SetParam(cl.COMPRESSED_BUFFER_SIZE, len(data)) - return self._comp.Compress(self._nframes, data) - - def _lin2ulaw(self, data): - import audioop - return audioop.lin2ulaw(data, 2) - - def _lin2adpcm(self, data): - import audioop - if not hasattr(self, '_adpcmstate'): - self._adpcmstate = None - data, self._adpcmstate = audioop.lin2adpcm(data, 2, - self._adpcmstate) - return data - - def _ensure_header_written(self, datasize): - if not self._nframeswritten: - if self._comptype in ('ULAW', 'ulaw', 'ALAW', 'alaw'): - if not self._sampwidth: - self._sampwidth = 2 - if self._sampwidth != 2: - raise Error, 'sample width must be 2 when compressing with ULAW or ALAW' - if self._comptype == 'G722': - if not self._sampwidth: - self._sampwidth = 2 - if self._sampwidth != 2: - raise Error, 'sample width must be 2 when compressing with G7.22 (ADPCM)' - if not self._nchannels: - raise Error, '# channels not specified' - if not self._sampwidth: - raise Error, 'sample width not specified' - if not self._framerate: - raise Error, 'sampling rate not specified' - self._write_header(datasize) - - def _init_compression(self): - if self._comptype == 'G722': - self._convert = self._lin2adpcm - return - try: - import cl - except ImportError: - if self._comptype in ('ULAW', 'ulaw'): - try: - import audioop - self._convert = self._lin2ulaw - return - except ImportError: - pass - raise Error, 'cannot write compressed AIFF-C files' - if self._comptype in ('ULAW', 'ulaw'): - scheme = cl.G711_ULAW - elif self._comptype in ('ALAW', 'alaw'): - scheme = cl.G711_ALAW - else: - raise Error, 'unsupported compression type' - self._comp = cl.OpenCompressor(scheme) - params = [cl.ORIGINAL_FORMAT, 0, - cl.BITS_PER_COMPONENT, self._sampwidth * 8, - cl.FRAME_RATE, self._framerate, - cl.FRAME_BUFFER_SIZE, 100, - cl.COMPRESSED_BUFFER_SIZE, 100] - if self._nchannels == 1: - params[1] = cl.MONO - elif self._nchannels == 2: - params[1] = cl.STEREO_INTERLEAVED - else: - raise Error, 'cannot compress more than 2 channels' - self._comp.SetParams(params) - # the compressor produces a header which we ignore - dummy = self._comp.Compress(0, '') - self._convert = self._comp_data - - def _write_header(self, initlength): - if self._aifc and self._comptype != 'NONE': - self._init_compression() - self._file.write('FORM') - if not self._nframes: - self._nframes = initlength // (self._nchannels * self._sampwidth) - self._datalength = self._nframes * self._nchannels * self._sampwidth - if self._datalength & 1: - self._datalength = self._datalength + 1 - if self._aifc: - if self._comptype in ('ULAW', 'ulaw', 'ALAW', 'alaw'): - self._datalength = self._datalength // 2 - if self._datalength & 1: - self._datalength = self._datalength + 1 - elif self._comptype == 'G722': - self._datalength = (self._datalength + 3) // 4 - if self._datalength & 1: - self._datalength = self._datalength + 1 - try: - self._form_length_pos = self._file.tell() - except (AttributeError, IOError): - self._form_length_pos = None - commlength = self._write_form_length(self._datalength) - if self._aifc: - self._file.write('AIFC') - self._file.write('FVER') - _write_ulong(self._file, 4) - _write_ulong(self._file, self._version) - else: - self._file.write('AIFF') - self._file.write('COMM') - _write_ulong(self._file, commlength) - _write_short(self._file, self._nchannels) - if self._form_length_pos is not None: - self._nframes_pos = self._file.tell() - _write_ulong(self._file, self._nframes) - if self._comptype in ('ULAW', 'ulaw', 'ALAW', 'alaw', 'G722'): - _write_short(self._file, 8) - else: - _write_short(self._file, self._sampwidth * 8) - _write_float(self._file, self._framerate) - if self._aifc: - self._file.write(self._comptype) - _write_string(self._file, self._compname) - self._file.write('SSND') - if self._form_length_pos is not None: - self._ssnd_length_pos = self._file.tell() - _write_ulong(self._file, self._datalength + 8) - _write_ulong(self._file, 0) - _write_ulong(self._file, 0) - - def _write_form_length(self, datalength): - if self._aifc: - commlength = 18 + 5 + len(self._compname) - if commlength & 1: - commlength = commlength + 1 - verslength = 12 - else: - commlength = 18 - verslength = 0 - _write_ulong(self._file, 4 + verslength + self._marklength + \ - 8 + commlength + 16 + datalength) - return commlength - - def _patchheader(self): - curpos = self._file.tell() - if self._datawritten & 1: - datalength = self._datawritten + 1 - self._file.write(chr(0)) - else: - datalength = self._datawritten - if datalength == self._datalength and \ - self._nframes == self._nframeswritten and \ - self._marklength == 0: - self._file.seek(curpos, 0) - return - self._file.seek(self._form_length_pos, 0) - dummy = self._write_form_length(datalength) - self._file.seek(self._nframes_pos, 0) - _write_ulong(self._file, self._nframeswritten) - self._file.seek(self._ssnd_length_pos, 0) - _write_ulong(self._file, datalength + 8) - self._file.seek(curpos, 0) - self._nframes = self._nframeswritten - self._datalength = datalength - - def _writemarkers(self): - if len(self._markers) == 0: - return - self._file.write('MARK') - length = 2 - for marker in self._markers: - id, pos, name = marker - length = length + len(name) + 1 + 6 - if len(name) & 1 == 0: - length = length + 1 - _write_ulong(self._file, length) - self._marklength = length + 8 - _write_short(self._file, len(self._markers)) - for marker in self._markers: - id, pos, name = marker - _write_short(self._file, id) - _write_ulong(self._file, pos) - _write_string(self._file, name) - -def open(f, mode=None): - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return Aifc_read(f) - elif mode in ('w', 'wb'): - return Aifc_write(f) - else: - raise Error, "mode must be 'r', 'rb', 'w', or 'wb'" - -openfp = open # B/W compatibility - -if __name__ == '__main__': - import sys - if not sys.argv[1:]: - sys.argv.append('/usr/demos/data/audio/bach.aiff') - fn = sys.argv[1] - f = open(fn, 'r') - try: - print "Reading", fn - print "nchannels =", f.getnchannels() - print "nframes =", f.getnframes() - print "sampwidth =", f.getsampwidth() - print "framerate =", f.getframerate() - print "comptype =", f.getcomptype() - print "compname =", f.getcompname() - if sys.argv[2:]: - gn = sys.argv[2] - print "Writing", gn - g = open(gn, 'w') - try: - g.setparams(f.getparams()) - while 1: - data = f.readframes(1024) - if not data: - break - g.writeframes(data) - finally: - g.close() - print "Done." - finally: - f.close() diff --git a/python/Lib/antigravity.py b/python/Lib/antigravity.py deleted file mode 100644 index 7fb7d8282a..0000000000 --- a/python/Lib/antigravity.py +++ /dev/null @@ -1,4 +0,0 @@ - -import webbrowser - -webbrowser.open("http://xkcd.com/353/") diff --git a/python/Lib/anydbm.py b/python/Lib/anydbm.py deleted file mode 100755 index ba7e90510a..0000000000 --- a/python/Lib/anydbm.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Generic interface to all dbm clones. - -Instead of - - import dbm - d = dbm.open(file, 'w', 0666) - -use - - import anydbm - d = anydbm.open(file, 'w') - -The returned object is a dbhash, gdbm, dbm or dumbdbm object, -dependent on the type of database being opened (determined by whichdb -module) in the case of an existing dbm. If the dbm does not exist and -the create or new flag ('c' or 'n') was specified, the dbm type will -be determined by the availability of the modules (tested in the above -order). - -It has the following interface (key and data are strings): - - d[key] = data # store data at key (may override data at - # existing key) - data = d[key] # retrieve data at key (raise KeyError if no - # such key) - del d[key] # delete data stored at key (raises KeyError - # if no such key) - flag = key in d # true if the key exists - list = d.keys() # return a list of all existing keys (slow!) - -Future versions may change the order in which implementations are -tested for existence, and add interfaces to other dbm-like -implementations. -""" - -class error(Exception): - pass - -_names = ['dbhash', 'gdbm', 'dbm', 'dumbdbm'] -_errors = [error] -_defaultmod = None - -for _name in _names: - try: - _mod = __import__(_name) - except ImportError: - continue - if not _defaultmod: - _defaultmod = _mod - _errors.append(_mod.error) - -if not _defaultmod: - raise ImportError, "no dbm clone found; tried %s" % _names - -error = tuple(_errors) - -def open(file, flag='r', mode=0666): - """Open or create database at path given by *file*. - - Optional argument *flag* can be 'r' (default) for read-only access, 'w' - for read-write access of an existing database, 'c' for read-write access - to a new or existing database, and 'n' for read-write access to a new - database. - - Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it - only if it doesn't exist; and 'n' always creates a new database. - """ - - # guess the type of an existing database - from whichdb import whichdb - result=whichdb(file) - if result is None: - # db doesn't exist - if 'c' in flag or 'n' in flag: - # file doesn't exist and the new - # flag was used so use default type - mod = _defaultmod - else: - raise error, "need 'c' or 'n' flag to open new db" - elif result == "": - # db type cannot be determined - raise error, "db type could not be determined" - else: - mod = __import__(result) - return mod.open(file, flag, mode) diff --git a/python/Lib/argparse.py b/python/Lib/argparse.py deleted file mode 100644 index 1b233b85d2..0000000000 --- a/python/Lib/argparse.py +++ /dev/null @@ -1,2374 +0,0 @@ -# Author: Steven J. Bethard . - -"""Command-line parsing library - -This module is an optparse-inspired command-line parsing library that: - - - handles both optional and positional arguments - - produces highly informative usage messages - - supports parsers that dispatch to sub-parsers - -The following is a simple usage example that sums integers from the -command-line and writes the result to a file:: - - parser = argparse.ArgumentParser( - description='sum the integers at the command line') - parser.add_argument( - 'integers', metavar='int', nargs='+', type=int, - help='an integer to be summed') - parser.add_argument( - '--log', default=sys.stdout, type=argparse.FileType('w'), - help='the file where the sum should be written') - args = parser.parse_args() - args.log.write('%s' % sum(args.integers)) - args.log.close() - -The module contains the following public classes: - - - ArgumentParser -- The main entry point for command-line parsing. As the - example above shows, the add_argument() method is used to populate - the parser with actions for optional and positional arguments. Then - the parse_args() method is invoked to convert the args at the - command-line into an object with attributes. - - - ArgumentError -- The exception raised by ArgumentParser objects when - there are errors with the parser's actions. Errors raised while - parsing the command-line are caught by ArgumentParser and emitted - as command-line messages. - - - FileType -- A factory for defining types of files to be created. As the - example above shows, instances of FileType are typically passed as - the type= argument of add_argument() calls. - - - Action -- The base class for parser actions. Typically actions are - selected by passing strings like 'store_true' or 'append_const' to - the action= argument of add_argument(). However, for greater - customization of ArgumentParser actions, subclasses of Action may - be defined and passed as the action= argument. - - - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter, - ArgumentDefaultsHelpFormatter -- Formatter classes which - may be passed as the formatter_class= argument to the - ArgumentParser constructor. HelpFormatter is the default, - RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser - not to change the formatting for help text, and - ArgumentDefaultsHelpFormatter adds information about argument defaults - to the help. - -All other classes in this module are considered implementation details. -(Also note that HelpFormatter and RawDescriptionHelpFormatter are only -considered public as object names -- the API of the formatter objects is -still considered an implementation detail.) -""" - -__version__ = '1.1' -__all__ = [ - 'ArgumentParser', - 'ArgumentError', - 'ArgumentTypeError', - 'FileType', - 'HelpFormatter', - 'ArgumentDefaultsHelpFormatter', - 'RawDescriptionHelpFormatter', - 'RawTextHelpFormatter', - 'Namespace', - 'Action', - 'ONE_OR_MORE', - 'OPTIONAL', - 'PARSER', - 'REMAINDER', - 'SUPPRESS', - 'ZERO_OR_MORE', -] - - -import collections as _collections -import copy as _copy -import os as _os -import re as _re -import sys as _sys -import textwrap as _textwrap - -from gettext import gettext as _ - - -def _callable(obj): - return hasattr(obj, '__call__') or hasattr(obj, '__bases__') - - -SUPPRESS = '==SUPPRESS==' - -OPTIONAL = '?' -ZERO_OR_MORE = '*' -ONE_OR_MORE = '+' -PARSER = 'A...' -REMAINDER = '...' -_UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args' - -# ============================= -# Utility functions and classes -# ============================= - -class _AttributeHolder(object): - """Abstract base class that provides __repr__. - - The __repr__ method returns a string in the format:: - ClassName(attr=name, attr=name, ...) - The attributes are determined either by a class-level attribute, - '_kwarg_names', or by inspecting the instance __dict__. - """ - - def __repr__(self): - type_name = type(self).__name__ - arg_strings = [] - for arg in self._get_args(): - arg_strings.append(repr(arg)) - for name, value in self._get_kwargs(): - arg_strings.append('%s=%r' % (name, value)) - return '%s(%s)' % (type_name, ', '.join(arg_strings)) - - def _get_kwargs(self): - return sorted(self.__dict__.items()) - - def _get_args(self): - return [] - - -def _ensure_value(namespace, name, value): - if getattr(namespace, name, None) is None: - setattr(namespace, name, value) - return getattr(namespace, name) - - -# =============== -# Formatting Help -# =============== - -class HelpFormatter(object): - """Formatter for generating usage messages and argument help strings. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def __init__(self, - prog, - indent_increment=2, - max_help_position=24, - width=None): - - # default setting for width - if width is None: - try: - width = int(_os.environ['COLUMNS']) - except (KeyError, ValueError): - width = 80 - width -= 2 - - self._prog = prog - self._indent_increment = indent_increment - self._max_help_position = max_help_position - self._max_help_position = min(max_help_position, - max(width - 20, indent_increment * 2)) - self._width = width - - self._current_indent = 0 - self._level = 0 - self._action_max_length = 0 - - self._root_section = self._Section(self, None) - self._current_section = self._root_section - - self._whitespace_matcher = _re.compile(r'\s+') - self._long_break_matcher = _re.compile(r'\n\n\n+') - - # =============================== - # Section and indentation methods - # =============================== - def _indent(self): - self._current_indent += self._indent_increment - self._level += 1 - - def _dedent(self): - self._current_indent -= self._indent_increment - assert self._current_indent >= 0, 'Indent decreased below 0.' - self._level -= 1 - - class _Section(object): - - def __init__(self, formatter, parent, heading=None): - self.formatter = formatter - self.parent = parent - self.heading = heading - self.items = [] - - def format_help(self): - # format the indented section - if self.parent is not None: - self.formatter._indent() - join = self.formatter._join_parts - for func, args in self.items: - func(*args) - item_help = join([func(*args) for func, args in self.items]) - if self.parent is not None: - self.formatter._dedent() - - # return nothing if the section was empty - if not item_help: - return '' - - # add the heading if the section was non-empty - if self.heading is not SUPPRESS and self.heading is not None: - current_indent = self.formatter._current_indent - heading = '%*s%s:\n' % (current_indent, '', self.heading) - else: - heading = '' - - # join the section-initial newline, the heading and the help - return join(['\n', heading, item_help, '\n']) - - def _add_item(self, func, args): - self._current_section.items.append((func, args)) - - # ======================== - # Message building methods - # ======================== - def start_section(self, heading): - self._indent() - section = self._Section(self, self._current_section, heading) - self._add_item(section.format_help, []) - self._current_section = section - - def end_section(self): - self._current_section = self._current_section.parent - self._dedent() - - def add_text(self, text): - if text is not SUPPRESS and text is not None: - self._add_item(self._format_text, [text]) - - def add_usage(self, usage, actions, groups, prefix=None): - if usage is not SUPPRESS: - args = usage, actions, groups, prefix - self._add_item(self._format_usage, args) - - def add_argument(self, action): - if action.help is not SUPPRESS: - - # find all invocations - get_invocation = self._format_action_invocation - invocations = [get_invocation(action)] - for subaction in self._iter_indented_subactions(action): - invocations.append(get_invocation(subaction)) - - # update the maximum item length - invocation_length = max([len(s) for s in invocations]) - action_length = invocation_length + self._current_indent - self._action_max_length = max(self._action_max_length, - action_length) - - # add the item to the list - self._add_item(self._format_action, [action]) - - def add_arguments(self, actions): - for action in actions: - self.add_argument(action) - - # ======================= - # Help-formatting methods - # ======================= - def format_help(self): - help = self._root_section.format_help() - if help: - help = self._long_break_matcher.sub('\n\n', help) - help = help.strip('\n') + '\n' - return help - - def _join_parts(self, part_strings): - return ''.join([part - for part in part_strings - if part and part is not SUPPRESS]) - - def _format_usage(self, usage, actions, groups, prefix): - if prefix is None: - prefix = _('usage: ') - - # if usage is specified, use that - if usage is not None: - usage = usage % dict(prog=self._prog) - - # if no optionals or positionals are available, usage is just prog - elif usage is None and not actions: - usage = '%(prog)s' % dict(prog=self._prog) - - # if optionals and positionals are available, calculate usage - elif usage is None: - prog = '%(prog)s' % dict(prog=self._prog) - - # split optionals from positionals - optionals = [] - positionals = [] - for action in actions: - if action.option_strings: - optionals.append(action) - else: - positionals.append(action) - - # build full usage string - format = self._format_actions_usage - action_usage = format(optionals + positionals, groups) - usage = ' '.join([s for s in [prog, action_usage] if s]) - - # wrap the usage parts if it's too long - text_width = self._width - self._current_indent - if len(prefix) + len(usage) > text_width: - - # break usage into wrappable parts - part_regexp = r'\(.*?\)+|\[.*?\]+|\S+' - opt_usage = format(optionals, groups) - pos_usage = format(positionals, groups) - opt_parts = _re.findall(part_regexp, opt_usage) - pos_parts = _re.findall(part_regexp, pos_usage) - assert ' '.join(opt_parts) == opt_usage - assert ' '.join(pos_parts) == pos_usage - - # helper for wrapping lines - def get_lines(parts, indent, prefix=None): - lines = [] - line = [] - if prefix is not None: - line_len = len(prefix) - 1 - else: - line_len = len(indent) - 1 - for part in parts: - if line_len + 1 + len(part) > text_width and line: - lines.append(indent + ' '.join(line)) - line = [] - line_len = len(indent) - 1 - line.append(part) - line_len += len(part) + 1 - if line: - lines.append(indent + ' '.join(line)) - if prefix is not None: - lines[0] = lines[0][len(indent):] - return lines - - # if prog is short, follow it with optionals or positionals - if len(prefix) + len(prog) <= 0.75 * text_width: - indent = ' ' * (len(prefix) + len(prog) + 1) - if opt_parts: - lines = get_lines([prog] + opt_parts, indent, prefix) - lines.extend(get_lines(pos_parts, indent)) - elif pos_parts: - lines = get_lines([prog] + pos_parts, indent, prefix) - else: - lines = [prog] - - # if prog is long, put it on its own line - else: - indent = ' ' * len(prefix) - parts = opt_parts + pos_parts - lines = get_lines(parts, indent) - if len(lines) > 1: - lines = [] - lines.extend(get_lines(opt_parts, indent)) - lines.extend(get_lines(pos_parts, indent)) - lines = [prog] + lines - - # join lines into usage - usage = '\n'.join(lines) - - # prefix with 'usage:' - return '%s%s\n\n' % (prefix, usage) - - def _format_actions_usage(self, actions, groups): - # find group indices and identify actions in groups - group_actions = set() - inserts = {} - for group in groups: - try: - start = actions.index(group._group_actions[0]) - except ValueError: - continue - else: - end = start + len(group._group_actions) - if actions[start:end] == group._group_actions: - for action in group._group_actions: - group_actions.add(action) - if not group.required: - if start in inserts: - inserts[start] += ' [' - else: - inserts[start] = '[' - inserts[end] = ']' - else: - if start in inserts: - inserts[start] += ' (' - else: - inserts[start] = '(' - inserts[end] = ')' - for i in range(start + 1, end): - inserts[i] = '|' - - # collect all actions format strings - parts = [] - for i, action in enumerate(actions): - - # suppressed arguments are marked with None - # remove | separators for suppressed arguments - if action.help is SUPPRESS: - parts.append(None) - if inserts.get(i) == '|': - inserts.pop(i) - elif inserts.get(i + 1) == '|': - inserts.pop(i + 1) - - # produce all arg strings - elif not action.option_strings: - part = self._format_args(action, action.dest) - - # if it's in a group, strip the outer [] - if action in group_actions: - if part[0] == '[' and part[-1] == ']': - part = part[1:-1] - - # add the action string to the list - parts.append(part) - - # produce the first way to invoke the option in brackets - else: - option_string = action.option_strings[0] - - # if the Optional doesn't take a value, format is: - # -s or --long - if action.nargs == 0: - part = '%s' % option_string - - # if the Optional takes a value, format is: - # -s ARGS or --long ARGS - else: - default = action.dest.upper() - args_string = self._format_args(action, default) - part = '%s %s' % (option_string, args_string) - - # make it look optional if it's not required or in a group - if not action.required and action not in group_actions: - part = '[%s]' % part - - # add the action string to the list - parts.append(part) - - # insert things at the necessary indices - for i in sorted(inserts, reverse=True): - parts[i:i] = [inserts[i]] - - # join all the action items with spaces - text = ' '.join([item for item in parts if item is not None]) - - # clean up separators for mutually exclusive groups - open = r'[\[(]' - close = r'[\])]' - text = _re.sub(r'(%s) ' % open, r'\1', text) - text = _re.sub(r' (%s)' % close, r'\1', text) - text = _re.sub(r'%s *%s' % (open, close), r'', text) - text = _re.sub(r'\(([^|]*)\)', r'\1', text) - text = text.strip() - - # return the text - return text - - def _format_text(self, text): - if '%(prog)' in text: - text = text % dict(prog=self._prog) - text_width = max(self._width - self._current_indent, 11) - indent = ' ' * self._current_indent - return self._fill_text(text, text_width, indent) + '\n\n' - - def _format_action(self, action): - # determine the required width and the entry label - help_position = min(self._action_max_length + 2, - self._max_help_position) - help_width = max(self._width - help_position, 11) - action_width = help_position - self._current_indent - 2 - action_header = self._format_action_invocation(action) - - # ho nelp; start on same line and add a final newline - if not action.help: - tup = self._current_indent, '', action_header - action_header = '%*s%s\n' % tup - - # short action name; start on the same line and pad two spaces - elif len(action_header) <= action_width: - tup = self._current_indent, '', action_width, action_header - action_header = '%*s%-*s ' % tup - indent_first = 0 - - # long action name; start on the next line - else: - tup = self._current_indent, '', action_header - action_header = '%*s%s\n' % tup - indent_first = help_position - - # collect the pieces of the action help - parts = [action_header] - - # if there was help for the action, add lines of help text - if action.help: - help_text = self._expand_help(action) - help_lines = self._split_lines(help_text, help_width) - parts.append('%*s%s\n' % (indent_first, '', help_lines[0])) - for line in help_lines[1:]: - parts.append('%*s%s\n' % (help_position, '', line)) - - # or add a newline if the description doesn't end with one - elif not action_header.endswith('\n'): - parts.append('\n') - - # if there are any sub-actions, add their help as well - for subaction in self._iter_indented_subactions(action): - parts.append(self._format_action(subaction)) - - # return a single string - return self._join_parts(parts) - - def _format_action_invocation(self, action): - if not action.option_strings: - metavar, = self._metavar_formatter(action, action.dest)(1) - return metavar - - else: - parts = [] - - # if the Optional doesn't take a value, format is: - # -s, --long - if action.nargs == 0: - parts.extend(action.option_strings) - - # if the Optional takes a value, format is: - # -s ARGS, --long ARGS - else: - default = action.dest.upper() - args_string = self._format_args(action, default) - for option_string in action.option_strings: - parts.append('%s %s' % (option_string, args_string)) - - return ', '.join(parts) - - def _metavar_formatter(self, action, default_metavar): - if action.metavar is not None: - result = action.metavar - elif action.choices is not None: - choice_strs = [str(choice) for choice in action.choices] - result = '{%s}' % ','.join(choice_strs) - else: - result = default_metavar - - def format(tuple_size): - if isinstance(result, tuple): - return result - else: - return (result, ) * tuple_size - return format - - def _format_args(self, action, default_metavar): - get_metavar = self._metavar_formatter(action, default_metavar) - if action.nargs is None: - result = '%s' % get_metavar(1) - elif action.nargs == OPTIONAL: - result = '[%s]' % get_metavar(1) - elif action.nargs == ZERO_OR_MORE: - result = '[%s [%s ...]]' % get_metavar(2) - elif action.nargs == ONE_OR_MORE: - result = '%s [%s ...]' % get_metavar(2) - elif action.nargs == REMAINDER: - result = '...' - elif action.nargs == PARSER: - result = '%s ...' % get_metavar(1) - else: - formats = ['%s' for _ in range(action.nargs)] - result = ' '.join(formats) % get_metavar(action.nargs) - return result - - def _expand_help(self, action): - params = dict(vars(action), prog=self._prog) - for name in list(params): - if params[name] is SUPPRESS: - del params[name] - for name in list(params): - if hasattr(params[name], '__name__'): - params[name] = params[name].__name__ - if params.get('choices') is not None: - choices_str = ', '.join([str(c) for c in params['choices']]) - params['choices'] = choices_str - return self._get_help_string(action) % params - - def _iter_indented_subactions(self, action): - try: - get_subactions = action._get_subactions - except AttributeError: - pass - else: - self._indent() - for subaction in get_subactions(): - yield subaction - self._dedent() - - def _split_lines(self, text, width): - text = self._whitespace_matcher.sub(' ', text).strip() - return _textwrap.wrap(text, width) - - def _fill_text(self, text, width, indent): - text = self._whitespace_matcher.sub(' ', text).strip() - return _textwrap.fill(text, width, initial_indent=indent, - subsequent_indent=indent) - - def _get_help_string(self, action): - return action.help - - -class RawDescriptionHelpFormatter(HelpFormatter): - """Help message formatter which retains any formatting in descriptions. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _fill_text(self, text, width, indent): - return ''.join([indent + line for line in text.splitlines(True)]) - - -class RawTextHelpFormatter(RawDescriptionHelpFormatter): - """Help message formatter which retains formatting of all help text. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _split_lines(self, text, width): - return text.splitlines() - - -class ArgumentDefaultsHelpFormatter(HelpFormatter): - """Help message formatter which adds default values to argument help. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _get_help_string(self, action): - help = action.help - if '%(default)' not in action.help: - if action.default is not SUPPRESS: - defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] - if action.option_strings or action.nargs in defaulting_nargs: - help += ' (default: %(default)s)' - return help - - -# ===================== -# Options and Arguments -# ===================== - -def _get_action_name(argument): - if argument is None: - return None - elif argument.option_strings: - return '/'.join(argument.option_strings) - elif argument.metavar not in (None, SUPPRESS): - return argument.metavar - elif argument.dest not in (None, SUPPRESS): - return argument.dest - else: - return None - - -class ArgumentError(Exception): - """An error from creating or using an argument (optional or positional). - - The string value of this exception is the message, augmented with - information about the argument that caused it. - """ - - def __init__(self, argument, message): - self.argument_name = _get_action_name(argument) - self.message = message - - def __str__(self): - if self.argument_name is None: - format = '%(message)s' - else: - format = 'argument %(argument_name)s: %(message)s' - return format % dict(message=self.message, - argument_name=self.argument_name) - - -class ArgumentTypeError(Exception): - """An error from trying to convert a command line string to a type.""" - pass - - -# ============== -# Action classes -# ============== - -class Action(_AttributeHolder): - """Information about how to convert command line strings to Python objects. - - Action objects are used by an ArgumentParser to represent the information - needed to parse a single argument from one or more strings from the - command line. The keyword arguments to the Action constructor are also - all attributes of Action instances. - - Keyword Arguments: - - - option_strings -- A list of command-line option strings which - should be associated with this action. - - - dest -- The name of the attribute to hold the created object(s) - - - nargs -- The number of command-line arguments that should be - consumed. By default, one argument will be consumed and a single - value will be produced. Other values include: - - N (an integer) consumes N arguments (and produces a list) - - '?' consumes zero or one arguments - - '*' consumes zero or more arguments (and produces a list) - - '+' consumes one or more arguments (and produces a list) - Note that the difference between the default and nargs=1 is that - with the default, a single value will be produced, while with - nargs=1, a list containing a single value will be produced. - - - const -- The value to be produced if the option is specified and the - option uses an action that takes no values. - - - default -- The value to be produced if the option is not specified. - - - type -- A callable that accepts a single string argument, and - returns the converted value. The standard Python types str, int, - float, and complex are useful examples of such callables. If None, - str is used. - - - choices -- A container of values that should be allowed. If not None, - after a command-line argument has been converted to the appropriate - type, an exception will be raised if it is not a member of this - collection. - - - required -- True if the action must always be specified at the - command line. This is only meaningful for optional command-line - arguments. - - - help -- The help string describing the argument. - - - metavar -- The name to be used for the option's argument with the - help string. If None, the 'dest' value will be used as the name. - """ - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - self.option_strings = option_strings - self.dest = dest - self.nargs = nargs - self.const = const - self.default = default - self.type = type - self.choices = choices - self.required = required - self.help = help - self.metavar = metavar - - def _get_kwargs(self): - names = [ - 'option_strings', - 'dest', - 'nargs', - 'const', - 'default', - 'type', - 'choices', - 'help', - 'metavar', - ] - return [(name, getattr(self, name)) for name in names] - - def __call__(self, parser, namespace, values, option_string=None): - raise NotImplementedError(_('.__call__() not defined')) - - -class _StoreAction(Action): - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - if nargs == 0: - raise ValueError('nargs for store actions must be > 0; if you ' - 'have nothing to store, actions such as store ' - 'true or store const may be more appropriate') - if const is not None and nargs != OPTIONAL: - raise ValueError('nargs must be %r to supply const' % OPTIONAL) - super(_StoreAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=nargs, - const=const, - default=default, - type=type, - choices=choices, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, values) - - -class _StoreConstAction(Action): - - def __init__(self, - option_strings, - dest, - const, - default=None, - required=False, - help=None, - metavar=None): - super(_StoreConstAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - const=const, - default=default, - required=required, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, self.const) - - -class _StoreTrueAction(_StoreConstAction): - - def __init__(self, - option_strings, - dest, - default=False, - required=False, - help=None): - super(_StoreTrueAction, self).__init__( - option_strings=option_strings, - dest=dest, - const=True, - default=default, - required=required, - help=help) - - -class _StoreFalseAction(_StoreConstAction): - - def __init__(self, - option_strings, - dest, - default=True, - required=False, - help=None): - super(_StoreFalseAction, self).__init__( - option_strings=option_strings, - dest=dest, - const=False, - default=default, - required=required, - help=help) - - -class _AppendAction(Action): - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - if nargs == 0: - raise ValueError('nargs for append actions must be > 0; if arg ' - 'strings are not supplying the value to append, ' - 'the append const action may be more appropriate') - if const is not None and nargs != OPTIONAL: - raise ValueError('nargs must be %r to supply const' % OPTIONAL) - super(_AppendAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=nargs, - const=const, - default=default, - type=type, - choices=choices, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - items = _copy.copy(_ensure_value(namespace, self.dest, [])) - items.append(values) - setattr(namespace, self.dest, items) - - -class _AppendConstAction(Action): - - def __init__(self, - option_strings, - dest, - const, - default=None, - required=False, - help=None, - metavar=None): - super(_AppendConstAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - const=const, - default=default, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - items = _copy.copy(_ensure_value(namespace, self.dest, [])) - items.append(self.const) - setattr(namespace, self.dest, items) - - -class _CountAction(Action): - - def __init__(self, - option_strings, - dest, - default=None, - required=False, - help=None): - super(_CountAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - default=default, - required=required, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - new_count = _ensure_value(namespace, self.dest, 0) + 1 - setattr(namespace, self.dest, new_count) - - -class _HelpAction(Action): - - def __init__(self, - option_strings, - dest=SUPPRESS, - default=SUPPRESS, - help=None): - super(_HelpAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - parser.print_help() - parser.exit() - - -class _VersionAction(Action): - - def __init__(self, - option_strings, - version=None, - dest=SUPPRESS, - default=SUPPRESS, - help="show program's version number and exit"): - super(_VersionAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help) - self.version = version - - def __call__(self, parser, namespace, values, option_string=None): - version = self.version - if version is None: - version = parser.version - formatter = parser._get_formatter() - formatter.add_text(version) - parser.exit(message=formatter.format_help()) - - -class _SubParsersAction(Action): - - class _ChoicesPseudoAction(Action): - - def __init__(self, name, help): - sup = super(_SubParsersAction._ChoicesPseudoAction, self) - sup.__init__(option_strings=[], dest=name, help=help) - - def __init__(self, - option_strings, - prog, - parser_class, - dest=SUPPRESS, - help=None, - metavar=None): - - self._prog_prefix = prog - self._parser_class = parser_class - self._name_parser_map = _collections.OrderedDict() - self._choices_actions = [] - - super(_SubParsersAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=PARSER, - choices=self._name_parser_map, - help=help, - metavar=metavar) - - def add_parser(self, name, **kwargs): - # set prog from the existing prefix - if kwargs.get('prog') is None: - kwargs['prog'] = '%s %s' % (self._prog_prefix, name) - - # create a pseudo-action to hold the choice help - if 'help' in kwargs: - help = kwargs.pop('help') - choice_action = self._ChoicesPseudoAction(name, help) - self._choices_actions.append(choice_action) - - # create the parser and add it to the map - parser = self._parser_class(**kwargs) - self._name_parser_map[name] = parser - return parser - - def _get_subactions(self): - return self._choices_actions - - def __call__(self, parser, namespace, values, option_string=None): - parser_name = values[0] - arg_strings = values[1:] - - # set the parser name if requested - if self.dest is not SUPPRESS: - setattr(namespace, self.dest, parser_name) - - # select the parser - try: - parser = self._name_parser_map[parser_name] - except KeyError: - tup = parser_name, ', '.join(self._name_parser_map) - msg = _('unknown parser %r (choices: %s)') % tup - raise ArgumentError(self, msg) - - # parse all the remaining options into the namespace - # store any unrecognized options on the object, so that the top - # level parser can decide what to do with them - - # In case this subparser defines new defaults, we parse them - # in a new namespace object and then update the original - # namespace for the relevant parts. - subnamespace, arg_strings = parser.parse_known_args(arg_strings, None) - for key, value in vars(subnamespace).items(): - setattr(namespace, key, value) - - if arg_strings: - vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) - getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) - - -# ============== -# Type classes -# ============== - -class FileType(object): - """Factory for creating file object types - - Instances of FileType are typically passed as type= arguments to the - ArgumentParser add_argument() method. - - Keyword Arguments: - - mode -- A string indicating how the file is to be opened. Accepts the - same values as the builtin open() function. - - bufsize -- The file's desired buffer size. Accepts the same values as - the builtin open() function. - """ - - def __init__(self, mode='r', bufsize=-1): - self._mode = mode - self._bufsize = bufsize - - def __call__(self, string): - # the special argument "-" means sys.std{in,out} - if string == '-': - if 'r' in self._mode: - return _sys.stdin - elif 'w' in self._mode: - return _sys.stdout - else: - msg = _('argument "-" with mode %r') % self._mode - raise ValueError(msg) - - # all other arguments are used as file names - try: - return open(string, self._mode, self._bufsize) - except IOError as e: - message = _("can't open '%s': %s") - raise ArgumentTypeError(message % (string, e)) - - def __repr__(self): - args = self._mode, self._bufsize - args_str = ', '.join(repr(arg) for arg in args if arg != -1) - return '%s(%s)' % (type(self).__name__, args_str) - -# =========================== -# Optional and Positional Parsing -# =========================== - -class Namespace(_AttributeHolder): - """Simple object for storing attributes. - - Implements equality by attribute names and values, and provides a simple - string representation. - """ - - def __init__(self, **kwargs): - for name in kwargs: - setattr(self, name, kwargs[name]) - - __hash__ = None - - def __eq__(self, other): - if not isinstance(other, Namespace): - return NotImplemented - return vars(self) == vars(other) - - def __ne__(self, other): - if not isinstance(other, Namespace): - return NotImplemented - return not (self == other) - - def __contains__(self, key): - return key in self.__dict__ - - -class _ActionsContainer(object): - - def __init__(self, - description, - prefix_chars, - argument_default, - conflict_handler): - super(_ActionsContainer, self).__init__() - - self.description = description - self.argument_default = argument_default - self.prefix_chars = prefix_chars - self.conflict_handler = conflict_handler - - # set up registries - self._registries = {} - - # register actions - self.register('action', None, _StoreAction) - self.register('action', 'store', _StoreAction) - self.register('action', 'store_const', _StoreConstAction) - self.register('action', 'store_true', _StoreTrueAction) - self.register('action', 'store_false', _StoreFalseAction) - self.register('action', 'append', _AppendAction) - self.register('action', 'append_const', _AppendConstAction) - self.register('action', 'count', _CountAction) - self.register('action', 'help', _HelpAction) - self.register('action', 'version', _VersionAction) - self.register('action', 'parsers', _SubParsersAction) - - # raise an exception if the conflict handler is invalid - self._get_handler() - - # action storage - self._actions = [] - self._option_string_actions = {} - - # groups - self._action_groups = [] - self._mutually_exclusive_groups = [] - - # defaults storage - self._defaults = {} - - # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') - - # whether or not there are any optionals that look like negative - # numbers -- uses a list so it can be shared and edited - self._has_negative_number_optionals = [] - - # ==================== - # Registration methods - # ==================== - def register(self, registry_name, value, object): - registry = self._registries.setdefault(registry_name, {}) - registry[value] = object - - def _registry_get(self, registry_name, value, default=None): - return self._registries[registry_name].get(value, default) - - # ================================== - # Namespace default accessor methods - # ================================== - def set_defaults(self, **kwargs): - self._defaults.update(kwargs) - - # if these defaults match any existing arguments, replace - # the previous default on the object with the new one - for action in self._actions: - if action.dest in kwargs: - action.default = kwargs[action.dest] - - def get_default(self, dest): - for action in self._actions: - if action.dest == dest and action.default is not None: - return action.default - return self._defaults.get(dest, None) - - - # ======================= - # Adding argument actions - # ======================= - def add_argument(self, *args, **kwargs): - """ - add_argument(dest, ..., name=value, ...) - add_argument(option_string, option_string, ..., name=value, ...) - """ - - # if no positional args are supplied or only one is supplied and - # it doesn't look like an option string, parse a positional - # argument - chars = self.prefix_chars - if not args or len(args) == 1 and args[0][0] not in chars: - if args and 'dest' in kwargs: - raise ValueError('dest supplied twice for positional argument') - kwargs = self._get_positional_kwargs(*args, **kwargs) - - # otherwise, we're adding an optional argument - else: - kwargs = self._get_optional_kwargs(*args, **kwargs) - - # if no default was supplied, use the parser-level default - if 'default' not in kwargs: - dest = kwargs['dest'] - if dest in self._defaults: - kwargs['default'] = self._defaults[dest] - elif self.argument_default is not None: - kwargs['default'] = self.argument_default - - # create the action object, and add it to the parser - action_class = self._pop_action_class(kwargs) - if not _callable(action_class): - raise ValueError('unknown action "%s"' % (action_class,)) - action = action_class(**kwargs) - - # raise an error if the action type is not callable - type_func = self._registry_get('type', action.type, action.type) - if not _callable(type_func): - raise ValueError('%r is not callable' % (type_func,)) - - # raise an error if the metavar does not match the type - if hasattr(self, "_get_formatter"): - try: - self._get_formatter()._format_args(action, None) - except TypeError: - raise ValueError("length of metavar tuple does not match nargs") - - return self._add_action(action) - - def add_argument_group(self, *args, **kwargs): - group = _ArgumentGroup(self, *args, **kwargs) - self._action_groups.append(group) - return group - - def add_mutually_exclusive_group(self, **kwargs): - group = _MutuallyExclusiveGroup(self, **kwargs) - self._mutually_exclusive_groups.append(group) - return group - - def _add_action(self, action): - # resolve any conflicts - self._check_conflict(action) - - # add to actions list - self._actions.append(action) - action.container = self - - # index the action by any option strings it has - for option_string in action.option_strings: - self._option_string_actions[option_string] = action - - # set the flag if any option strings look like negative numbers - for option_string in action.option_strings: - if self._negative_number_matcher.match(option_string): - if not self._has_negative_number_optionals: - self._has_negative_number_optionals.append(True) - - # return the created action - return action - - def _remove_action(self, action): - self._actions.remove(action) - - def _add_container_actions(self, container): - # collect groups by titles - title_group_map = {} - for group in self._action_groups: - if group.title in title_group_map: - msg = _('cannot merge actions - two groups are named %r') - raise ValueError(msg % (group.title)) - title_group_map[group.title] = group - - # map each action to its group - group_map = {} - for group in container._action_groups: - - # if a group with the title exists, use that, otherwise - # create a new group matching the container's group - if group.title not in title_group_map: - title_group_map[group.title] = self.add_argument_group( - title=group.title, - description=group.description, - conflict_handler=group.conflict_handler) - - # map the actions to their new group - for action in group._group_actions: - group_map[action] = title_group_map[group.title] - - # add container's mutually exclusive groups - # NOTE: if add_mutually_exclusive_group ever gains title= and - # description= then this code will need to be expanded as above - for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( - required=group.required) - - # map the actions to their new mutex group - for action in group._group_actions: - group_map[action] = mutex_group - - # add all actions to this container or their group - for action in container._actions: - group_map.get(action, self)._add_action(action) - - def _get_positional_kwargs(self, dest, **kwargs): - # make sure required is not specified - if 'required' in kwargs: - msg = _("'required' is an invalid argument for positionals") - raise TypeError(msg) - - # mark positional arguments as required if at least one is - # always required - if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: - kwargs['required'] = True - if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: - kwargs['required'] = True - - # return the keyword arguments with no option strings - return dict(kwargs, dest=dest, option_strings=[]) - - def _get_optional_kwargs(self, *args, **kwargs): - # determine short and long option strings - option_strings = [] - long_option_strings = [] - for option_string in args: - # error on strings that don't start with an appropriate prefix - if not option_string[0] in self.prefix_chars: - msg = _('invalid option string %r: ' - 'must start with a character %r') - tup = option_string, self.prefix_chars - raise ValueError(msg % tup) - - # strings starting with two prefix characters are long options - option_strings.append(option_string) - if option_string[0] in self.prefix_chars: - if len(option_string) > 1: - if option_string[1] in self.prefix_chars: - long_option_strings.append(option_string) - - # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' - dest = kwargs.pop('dest', None) - if dest is None: - if long_option_strings: - dest_option_string = long_option_strings[0] - else: - dest_option_string = option_strings[0] - dest = dest_option_string.lstrip(self.prefix_chars) - if not dest: - msg = _('dest= is required for options like %r') - raise ValueError(msg % option_string) - dest = dest.replace('-', '_') - - # return the updated keyword arguments - return dict(kwargs, dest=dest, option_strings=option_strings) - - def _pop_action_class(self, kwargs, default=None): - action = kwargs.pop('action', default) - return self._registry_get('action', action, action) - - def _get_handler(self): - # determine function from conflict handler string - handler_func_name = '_handle_conflict_%s' % self.conflict_handler - try: - return getattr(self, handler_func_name) - except AttributeError: - msg = _('invalid conflict_resolution value: %r') - raise ValueError(msg % self.conflict_handler) - - def _check_conflict(self, action): - - # find all options that conflict with this option - confl_optionals = [] - for option_string in action.option_strings: - if option_string in self._option_string_actions: - confl_optional = self._option_string_actions[option_string] - confl_optionals.append((option_string, confl_optional)) - - # resolve any conflicts - if confl_optionals: - conflict_handler = self._get_handler() - conflict_handler(action, confl_optionals) - - def _handle_conflict_error(self, action, conflicting_actions): - message = _('conflicting option string(s): %s') - conflict_string = ', '.join([option_string - for option_string, action - in conflicting_actions]) - raise ArgumentError(action, message % conflict_string) - - def _handle_conflict_resolve(self, action, conflicting_actions): - - # remove all conflicting options - for option_string, action in conflicting_actions: - - # remove the conflicting option - action.option_strings.remove(option_string) - self._option_string_actions.pop(option_string, None) - - # if the option now has no option string, remove it from the - # container holding it - if not action.option_strings: - action.container._remove_action(action) - - -class _ArgumentGroup(_ActionsContainer): - - def __init__(self, container, title=None, description=None, **kwargs): - # add any missing keyword arguments by checking the container - update = kwargs.setdefault - update('conflict_handler', container.conflict_handler) - update('prefix_chars', container.prefix_chars) - update('argument_default', container.argument_default) - super_init = super(_ArgumentGroup, self).__init__ - super_init(description=description, **kwargs) - - # group attributes - self.title = title - self._group_actions = [] - - # share most attributes with the container - self._registries = container._registries - self._actions = container._actions - self._option_string_actions = container._option_string_actions - self._defaults = container._defaults - self._has_negative_number_optionals = \ - container._has_negative_number_optionals - self._mutually_exclusive_groups = container._mutually_exclusive_groups - - def _add_action(self, action): - action = super(_ArgumentGroup, self)._add_action(action) - self._group_actions.append(action) - return action - - def _remove_action(self, action): - super(_ArgumentGroup, self)._remove_action(action) - self._group_actions.remove(action) - - -class _MutuallyExclusiveGroup(_ArgumentGroup): - - def __init__(self, container, required=False): - super(_MutuallyExclusiveGroup, self).__init__(container) - self.required = required - self._container = container - - def _add_action(self, action): - if action.required: - msg = _('mutually exclusive arguments must be optional') - raise ValueError(msg) - action = self._container._add_action(action) - self._group_actions.append(action) - return action - - def _remove_action(self, action): - self._container._remove_action(action) - self._group_actions.remove(action) - - -class ArgumentParser(_AttributeHolder, _ActionsContainer): - """Object for parsing command line strings into Python objects. - - Keyword Arguments: - - prog -- The name of the program (default: sys.argv[0]) - - usage -- A usage message (default: auto-generated from arguments) - - description -- A description of what the program does - - epilog -- Text following the argument descriptions - - parents -- Parsers whose arguments should be copied into this one - - formatter_class -- HelpFormatter class for printing help messages - - prefix_chars -- Characters that prefix optional arguments - - fromfile_prefix_chars -- Characters that prefix files containing - additional arguments - - argument_default -- The default value for all arguments - - conflict_handler -- String indicating how to handle conflicts - - add_help -- Add a -h/-help option - """ - - def __init__(self, - prog=None, - usage=None, - description=None, - epilog=None, - version=None, - parents=[], - formatter_class=HelpFormatter, - prefix_chars='-', - fromfile_prefix_chars=None, - argument_default=None, - conflict_handler='error', - add_help=True): - - if version is not None: - import warnings - warnings.warn( - """The "version" argument to ArgumentParser is deprecated. """ - """Please use """ - """"add_argument(..., action='version', version="N", ...)" """ - """instead""", DeprecationWarning) - - superinit = super(ArgumentParser, self).__init__ - superinit(description=description, - prefix_chars=prefix_chars, - argument_default=argument_default, - conflict_handler=conflict_handler) - - # default setting for prog - if prog is None: - prog = _os.path.basename(_sys.argv[0]) - - self.prog = prog - self.usage = usage - self.epilog = epilog - self.version = version - self.formatter_class = formatter_class - self.fromfile_prefix_chars = fromfile_prefix_chars - self.add_help = add_help - - add_group = self.add_argument_group - self._positionals = add_group(_('positional arguments')) - self._optionals = add_group(_('optional arguments')) - self._subparsers = None - - # register types - def identity(string): - return string - self.register('type', None, identity) - - # add help and version arguments if necessary - # (using explicit default to override global argument_default) - default_prefix = '-' if '-' in prefix_chars else prefix_chars[0] - if self.add_help: - self.add_argument( - default_prefix+'h', default_prefix*2+'help', - action='help', default=SUPPRESS, - help=_('show this help message and exit')) - if self.version: - self.add_argument( - default_prefix+'v', default_prefix*2+'version', - action='version', default=SUPPRESS, - version=self.version, - help=_("show program's version number and exit")) - - # add parent arguments and defaults - for parent in parents: - self._add_container_actions(parent) - try: - defaults = parent._defaults - except AttributeError: - pass - else: - self._defaults.update(defaults) - - # ======================= - # Pretty __repr__ methods - # ======================= - def _get_kwargs(self): - names = [ - 'prog', - 'usage', - 'description', - 'version', - 'formatter_class', - 'conflict_handler', - 'add_help', - ] - return [(name, getattr(self, name)) for name in names] - - # ================================== - # Optional/Positional adding methods - # ================================== - def add_subparsers(self, **kwargs): - if self._subparsers is not None: - self.error(_('cannot have multiple subparser arguments')) - - # add the parser class to the arguments if it's not present - kwargs.setdefault('parser_class', type(self)) - - if 'title' in kwargs or 'description' in kwargs: - title = _(kwargs.pop('title', 'subcommands')) - description = _(kwargs.pop('description', None)) - self._subparsers = self.add_argument_group(title, description) - else: - self._subparsers = self._positionals - - # prog defaults to the usage message of this parser, skipping - # optional arguments and with no "usage:" prefix - if kwargs.get('prog') is None: - formatter = self._get_formatter() - positionals = self._get_positional_actions() - groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') - kwargs['prog'] = formatter.format_help().strip() - - # create the parsers action and add it to the positionals list - parsers_class = self._pop_action_class(kwargs, 'parsers') - action = parsers_class(option_strings=[], **kwargs) - self._subparsers._add_action(action) - - # return the created parsers action - return action - - def _add_action(self, action): - if action.option_strings: - self._optionals._add_action(action) - else: - self._positionals._add_action(action) - return action - - def _get_optional_actions(self): - return [action - for action in self._actions - if action.option_strings] - - def _get_positional_actions(self): - return [action - for action in self._actions - if not action.option_strings] - - # ===================================== - # Command line argument parsing methods - # ===================================== - def parse_args(self, args=None, namespace=None): - args, argv = self.parse_known_args(args, namespace) - if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) - return args - - def parse_known_args(self, args=None, namespace=None): - if args is None: - # args default to the system args - args = _sys.argv[1:] - else: - # make sure that args are mutable - args = list(args) - - # default Namespace built from parser defaults - if namespace is None: - namespace = Namespace() - - # add any action defaults that aren't present - for action in self._actions: - if action.dest is not SUPPRESS: - if not hasattr(namespace, action.dest): - if action.default is not SUPPRESS: - setattr(namespace, action.dest, action.default) - - # add any parser defaults that aren't present - for dest in self._defaults: - if not hasattr(namespace, dest): - setattr(namespace, dest, self._defaults[dest]) - - # parse the arguments and exit if there are any errors - try: - namespace, args = self._parse_known_args(args, namespace) - if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): - args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) - delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) - return namespace, args - except ArgumentError: - err = _sys.exc_info()[1] - self.error(str(err)) - - def _parse_known_args(self, arg_strings, namespace): - # replace arg strings that are file references - if self.fromfile_prefix_chars is not None: - arg_strings = self._read_args_from_files(arg_strings) - - # map all mutually exclusive arguments to the other arguments - # they can't occur with - action_conflicts = {} - for mutex_group in self._mutually_exclusive_groups: - group_actions = mutex_group._group_actions - for i, mutex_action in enumerate(mutex_group._group_actions): - conflicts = action_conflicts.setdefault(mutex_action, []) - conflicts.extend(group_actions[:i]) - conflicts.extend(group_actions[i + 1:]) - - # find all option indices, and determine the arg_string_pattern - # which has an 'O' if there is an option at an index, - # an 'A' if there is an argument, or a '-' if there is a '--' - option_string_indices = {} - arg_string_pattern_parts = [] - arg_strings_iter = iter(arg_strings) - for i, arg_string in enumerate(arg_strings_iter): - - # all args after -- are non-options - if arg_string == '--': - arg_string_pattern_parts.append('-') - for arg_string in arg_strings_iter: - arg_string_pattern_parts.append('A') - - # otherwise, add the arg to the arg strings - # and note the index if it was an option - else: - option_tuple = self._parse_optional(arg_string) - if option_tuple is None: - pattern = 'A' - else: - option_string_indices[i] = option_tuple - pattern = 'O' - arg_string_pattern_parts.append(pattern) - - # join the pieces together to form the pattern - arg_strings_pattern = ''.join(arg_string_pattern_parts) - - # converts arg strings to the appropriate and then takes the action - seen_actions = set() - seen_non_default_actions = set() - - def take_action(action, argument_strings, option_string=None): - seen_actions.add(action) - argument_values = self._get_values(action, argument_strings) - - # error if this argument is not allowed with other previously - # seen arguments, assuming that actions that use the default - # value don't really count as "present" - if argument_values is not action.default: - seen_non_default_actions.add(action) - for conflict_action in action_conflicts.get(action, []): - if conflict_action in seen_non_default_actions: - msg = _('not allowed with argument %s') - action_name = _get_action_name(conflict_action) - raise ArgumentError(action, msg % action_name) - - # take the action if we didn't receive a SUPPRESS value - # (e.g. from a default) - if argument_values is not SUPPRESS: - action(self, namespace, argument_values, option_string) - - # function to convert arg_strings into an optional action - def consume_optional(start_index): - - # get the optional identified at this index - option_tuple = option_string_indices[start_index] - action, option_string, explicit_arg = option_tuple - - # identify additional optionals in the same arg string - # (e.g. -xyz is the same as -x -y -z if no args are required) - match_argument = self._match_argument - action_tuples = [] - while True: - - # if we found no optional action, skip it - if action is None: - extras.append(arg_strings[start_index]) - return start_index + 1 - - # if there is an explicit argument, try to match the - # optional's string arguments to only this - if explicit_arg is not None: - arg_count = match_argument(action, 'A') - - # if the action is a single-dash option and takes no - # arguments, try to parse more single-dash options out - # of the tail of the option string - chars = self.prefix_chars - if arg_count == 0 and option_string[1] not in chars: - action_tuples.append((action, [], option_string)) - char = option_string[0] - option_string = char + explicit_arg[0] - new_explicit_arg = explicit_arg[1:] or None - optionals_map = self._option_string_actions - if option_string in optionals_map: - action = optionals_map[option_string] - explicit_arg = new_explicit_arg - else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - - # if the action expect exactly one argument, we've - # successfully matched the option; exit the loop - elif arg_count == 1: - stop = start_index + 1 - args = [explicit_arg] - action_tuples.append((action, args, option_string)) - break - - # error if a double-dash option did not use the - # explicit argument - else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - - # if there is no explicit argument, try to match the - # optional's string arguments with the following strings - # if successful, exit the loop - else: - start = start_index + 1 - selected_patterns = arg_strings_pattern[start:] - arg_count = match_argument(action, selected_patterns) - stop = start + arg_count - args = arg_strings[start:stop] - action_tuples.append((action, args, option_string)) - break - - # add the Optional to the list and return the index at which - # the Optional's string args stopped - assert action_tuples - for action, args, option_string in action_tuples: - take_action(action, args, option_string) - return stop - - # the list of Positionals left to be parsed; this is modified - # by consume_positionals() - positionals = self._get_positional_actions() - - # function to convert arg_strings into positional actions - def consume_positionals(start_index): - # match as many Positionals as possible - match_partial = self._match_arguments_partial - selected_pattern = arg_strings_pattern[start_index:] - arg_counts = match_partial(positionals, selected_pattern) - - # slice off the appropriate arg strings for each Positional - # and add the Positional and its args to the list - for action, arg_count in zip(positionals, arg_counts): - args = arg_strings[start_index: start_index + arg_count] - start_index += arg_count - take_action(action, args) - - # slice off the Positionals that we just parsed and return the - # index at which the Positionals' string args stopped - positionals[:] = positionals[len(arg_counts):] - return start_index - - # consume Positionals and Optionals alternately, until we have - # passed the last option string - extras = [] - start_index = 0 - if option_string_indices: - max_option_string_index = max(option_string_indices) - else: - max_option_string_index = -1 - while start_index <= max_option_string_index: - - # consume any Positionals preceding the next option - next_option_string_index = min([ - index - for index in option_string_indices - if index >= start_index]) - if start_index != next_option_string_index: - positionals_end_index = consume_positionals(start_index) - - # only try to parse the next optional if we didn't consume - # the option string during the positionals parsing - if positionals_end_index > start_index: - start_index = positionals_end_index - continue - else: - start_index = positionals_end_index - - # if we consumed all the positionals we could and we're not - # at the index of an option string, there were extra arguments - if start_index not in option_string_indices: - strings = arg_strings[start_index:next_option_string_index] - extras.extend(strings) - start_index = next_option_string_index - - # consume the next optional and any arguments for it - start_index = consume_optional(start_index) - - # consume any positionals following the last Optional - stop_index = consume_positionals(start_index) - - # if we didn't consume all the argument strings, there were extras - extras.extend(arg_strings[stop_index:]) - - # if we didn't use all the Positional objects, there were too few - # arg strings supplied. - if positionals: - self.error(_('too few arguments')) - - # make sure all required actions were present, and convert defaults. - for action in self._actions: - if action not in seen_actions: - if action.required: - name = _get_action_name(action) - self.error(_('argument %s is required') % name) - else: - # Convert action default now instead of doing it before - # parsing arguments to avoid calling convert functions - # twice (which may fail) if the argument was given, but - # only if it was defined already in the namespace - if (action.default is not None and - isinstance(action.default, basestring) and - hasattr(namespace, action.dest) and - action.default is getattr(namespace, action.dest)): - setattr(namespace, action.dest, - self._get_value(action, action.default)) - - # make sure all required groups had one option present - for group in self._mutually_exclusive_groups: - if group.required: - for action in group._group_actions: - if action in seen_non_default_actions: - break - - # if no actions were used, report the error - else: - names = [_get_action_name(action) - for action in group._group_actions - if action.help is not SUPPRESS] - msg = _('one of the arguments %s is required') - self.error(msg % ' '.join(names)) - - # return the updated namespace and the extra arguments - return namespace, extras - - def _read_args_from_files(self, arg_strings): - # expand arguments referencing files - new_arg_strings = [] - for arg_string in arg_strings: - - # for regular arguments, just add them back into the list - if not arg_string or arg_string[0] not in self.fromfile_prefix_chars: - new_arg_strings.append(arg_string) - - # replace arguments referencing files with the file content - else: - try: - args_file = open(arg_string[1:]) - try: - arg_strings = [] - for arg_line in args_file.read().splitlines(): - for arg in self.convert_arg_line_to_args(arg_line): - arg_strings.append(arg) - arg_strings = self._read_args_from_files(arg_strings) - new_arg_strings.extend(arg_strings) - finally: - args_file.close() - except IOError: - err = _sys.exc_info()[1] - self.error(str(err)) - - # return the modified argument list - return new_arg_strings - - def convert_arg_line_to_args(self, arg_line): - return [arg_line] - - def _match_argument(self, action, arg_strings_pattern): - # match the pattern for this action to the arg strings - nargs_pattern = self._get_nargs_pattern(action) - match = _re.match(nargs_pattern, arg_strings_pattern) - - # raise an exception if we weren't able to find a match - if match is None: - nargs_errors = { - None: _('expected one argument'), - OPTIONAL: _('expected at most one argument'), - ONE_OR_MORE: _('expected at least one argument'), - } - default = _('expected %s argument(s)') % action.nargs - msg = nargs_errors.get(action.nargs, default) - raise ArgumentError(action, msg) - - # return the number of arguments matched - return len(match.group(1)) - - def _match_arguments_partial(self, actions, arg_strings_pattern): - # progressively shorten the actions list by slicing off the - # final actions until we find a match - result = [] - for i in range(len(actions), 0, -1): - actions_slice = actions[:i] - pattern = ''.join([self._get_nargs_pattern(action) - for action in actions_slice]) - match = _re.match(pattern, arg_strings_pattern) - if match is not None: - result.extend([len(string) for string in match.groups()]) - break - - # return the list of arg string counts - return result - - def _parse_optional(self, arg_string): - # if it's an empty string, it was meant to be a positional - if not arg_string: - return None - - # if it doesn't start with a prefix, it was meant to be positional - if not arg_string[0] in self.prefix_chars: - return None - - # if the option string is present in the parser, return the action - if arg_string in self._option_string_actions: - action = self._option_string_actions[arg_string] - return action, arg_string, None - - # if it's just a single character, it was meant to be positional - if len(arg_string) == 1: - return None - - # if the option string before the "=" is present, return the action - if '=' in arg_string: - option_string, explicit_arg = arg_string.split('=', 1) - if option_string in self._option_string_actions: - action = self._option_string_actions[option_string] - return action, option_string, explicit_arg - - # search through all possible prefixes of the option string - # and all actions in the parser for possible interpretations - option_tuples = self._get_option_tuples(arg_string) - - # if multiple actions match, the option string was ambiguous - if len(option_tuples) > 1: - options = ', '.join([option_string - for action, option_string, explicit_arg in option_tuples]) - tup = arg_string, options - self.error(_('ambiguous option: %s could match %s') % tup) - - # if exactly one action matched, this segmentation is good, - # so return the parsed action - elif len(option_tuples) == 1: - option_tuple, = option_tuples - return option_tuple - - # if it was not found as an option, but it looks like a negative - # number, it was meant to be positional - # unless there are negative-number-like options - if self._negative_number_matcher.match(arg_string): - if not self._has_negative_number_optionals: - return None - - # if it contains a space, it was meant to be a positional - if ' ' in arg_string: - return None - - # it was meant to be an optional but there is no such option - # in this parser (though it might be a valid option in a subparser) - return None, arg_string, None - - def _get_option_tuples(self, option_string): - result = [] - - # option strings starting with two prefix characters are only - # split at the '=' - chars = self.prefix_chars - if option_string[0] in chars and option_string[1] in chars: - if '=' in option_string: - option_prefix, explicit_arg = option_string.split('=', 1) - else: - option_prefix = option_string - explicit_arg = None - for option_string in self._option_string_actions: - if option_string.startswith(option_prefix): - action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg - result.append(tup) - - # single character options can be concatenated with their arguments - # but multiple character options always have to have their argument - # separate - elif option_string[0] in chars and option_string[1] not in chars: - option_prefix = option_string - explicit_arg = None - short_option_prefix = option_string[:2] - short_explicit_arg = option_string[2:] - - for option_string in self._option_string_actions: - if option_string == short_option_prefix: - action = self._option_string_actions[option_string] - tup = action, option_string, short_explicit_arg - result.append(tup) - elif option_string.startswith(option_prefix): - action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg - result.append(tup) - - # shouldn't ever get here - else: - self.error(_('unexpected option string: %s') % option_string) - - # return the collected option tuples - return result - - def _get_nargs_pattern(self, action): - # in all examples below, we have to allow for '--' args - # which are represented as '-' in the pattern - nargs = action.nargs - - # the default (None) is assumed to be a single argument - if nargs is None: - nargs_pattern = '(-*A-*)' - - # allow zero or one arguments - elif nargs == OPTIONAL: - nargs_pattern = '(-*A?-*)' - - # allow zero or more arguments - elif nargs == ZERO_OR_MORE: - nargs_pattern = '(-*[A-]*)' - - # allow one or more arguments - elif nargs == ONE_OR_MORE: - nargs_pattern = '(-*A[A-]*)' - - # allow any number of options or arguments - elif nargs == REMAINDER: - nargs_pattern = '([-AO]*)' - - # allow one argument followed by any number of options or arguments - elif nargs == PARSER: - nargs_pattern = '(-*A[-AO]*)' - - # all others should be integers - else: - nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) - - # if this is an optional action, -- is not allowed - if action.option_strings: - nargs_pattern = nargs_pattern.replace('-*', '') - nargs_pattern = nargs_pattern.replace('-', '') - - # return the pattern - return nargs_pattern - - # ======================== - # Value conversion methods - # ======================== - def _get_values(self, action, arg_strings): - # for everything but PARSER, REMAINDER args, strip out first '--' - if action.nargs not in [PARSER, REMAINDER]: - try: - arg_strings.remove('--') - except ValueError: - pass - - # optional argument produces a default when not present - if not arg_strings and action.nargs == OPTIONAL: - if action.option_strings: - value = action.const - else: - value = action.default - if isinstance(value, basestring): - value = self._get_value(action, value) - self._check_value(action, value) - - # when nargs='*' on a positional, if there were no command-line - # args, use the default if it is anything other than None - elif (not arg_strings and action.nargs == ZERO_OR_MORE and - not action.option_strings): - if action.default is not None: - value = action.default - else: - value = arg_strings - self._check_value(action, value) - - # single argument or optional argument produces a single value - elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: - arg_string, = arg_strings - value = self._get_value(action, arg_string) - self._check_value(action, value) - - # REMAINDER arguments convert all values, checking none - elif action.nargs == REMAINDER: - value = [self._get_value(action, v) for v in arg_strings] - - # PARSER arguments convert all values, but check only the first - elif action.nargs == PARSER: - value = [self._get_value(action, v) for v in arg_strings] - self._check_value(action, value[0]) - - # all other types of nargs produce a list - else: - value = [self._get_value(action, v) for v in arg_strings] - for v in value: - self._check_value(action, v) - - # return the converted value - return value - - def _get_value(self, action, arg_string): - type_func = self._registry_get('type', action.type, action.type) - if not _callable(type_func): - msg = _('%r is not callable') - raise ArgumentError(action, msg % type_func) - - # convert the value to the appropriate type - try: - result = type_func(arg_string) - - # ArgumentTypeErrors indicate errors - except ArgumentTypeError: - name = getattr(action.type, '__name__', repr(action.type)) - msg = str(_sys.exc_info()[1]) - raise ArgumentError(action, msg) - - # TypeErrors or ValueErrors also indicate errors - except (TypeError, ValueError): - name = getattr(action.type, '__name__', repr(action.type)) - msg = _('invalid %s value: %r') - raise ArgumentError(action, msg % (name, arg_string)) - - # return the converted value - return result - - def _check_value(self, action, value): - # converted value must be one of the choices (if specified) - if action.choices is not None and value not in action.choices: - tup = value, ', '.join(map(repr, action.choices)) - msg = _('invalid choice: %r (choose from %s)') % tup - raise ArgumentError(action, msg) - - # ======================= - # Help-formatting methods - # ======================= - def format_usage(self): - formatter = self._get_formatter() - formatter.add_usage(self.usage, self._actions, - self._mutually_exclusive_groups) - return formatter.format_help() - - def format_help(self): - formatter = self._get_formatter() - - # usage - formatter.add_usage(self.usage, self._actions, - self._mutually_exclusive_groups) - - # description - formatter.add_text(self.description) - - # positionals, optionals and user-defined groups - for action_group in self._action_groups: - formatter.start_section(action_group.title) - formatter.add_text(action_group.description) - formatter.add_arguments(action_group._group_actions) - formatter.end_section() - - # epilog - formatter.add_text(self.epilog) - - # determine help from format above - return formatter.format_help() - - def format_version(self): - import warnings - warnings.warn( - 'The format_version method is deprecated -- the "version" ' - 'argument to ArgumentParser is no longer supported.', - DeprecationWarning) - formatter = self._get_formatter() - formatter.add_text(self.version) - return formatter.format_help() - - def _get_formatter(self): - return self.formatter_class(prog=self.prog) - - # ===================== - # Help-printing methods - # ===================== - def print_usage(self, file=None): - if file is None: - file = _sys.stdout - self._print_message(self.format_usage(), file) - - def print_help(self, file=None): - if file is None: - file = _sys.stdout - self._print_message(self.format_help(), file) - - def print_version(self, file=None): - import warnings - warnings.warn( - 'The print_version method is deprecated -- the "version" ' - 'argument to ArgumentParser is no longer supported.', - DeprecationWarning) - self._print_message(self.format_version(), file) - - def _print_message(self, message, file=None): - if message: - if file is None: - file = _sys.stderr - file.write(message) - - # =============== - # Exiting methods - # =============== - def exit(self, status=0, message=None): - if message: - self._print_message(message, _sys.stderr) - _sys.exit(status) - - def error(self, message): - """error(message: string) - - Prints a usage message incorporating the message to stderr and - exits. - - If you override this in a subclass, it should not return -- it - should either exit or raise an exception. - """ - self.print_usage(_sys.stderr) - self.exit(2, _('%s: error: %s\n') % (self.prog, message)) diff --git a/python/Lib/ast.py b/python/Lib/ast.py deleted file mode 100755 index fd5dfdba67..0000000000 --- a/python/Lib/ast.py +++ /dev/null @@ -1,311 +0,0 @@ -# -*- coding: utf-8 -*- -""" - ast - ~~~ - - The `ast` module helps Python applications to process trees of the Python - abstract syntax grammar. The abstract syntax itself might change with - each Python release; this module helps to find out programmatically what - the current grammar looks like and allows modifications of it. - - An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as - a flag to the `compile()` builtin function or by using the `parse()` - function from this module. The result will be a tree of objects whose - classes all inherit from `ast.AST`. - - A modified abstract syntax tree can be compiled into a Python code object - using the built-in `compile()` function. - - Additionally various helper functions are provided that make working with - the trees simpler. The main intention of the helper functions and this - module in general is to provide an easy to use interface for libraries - that work tightly with the python syntax (template engines for example). - - - :copyright: Copyright 2008 by Armin Ronacher. - :license: Python License. -""" -from _ast import * -from _ast import __version__ - - -def parse(source, filename='', mode='exec'): - """ - Parse the source into an AST node. - Equivalent to compile(source, filename, mode, PyCF_ONLY_AST). - """ - return compile(source, filename, mode, PyCF_ONLY_AST) - - -def literal_eval(node_or_string): - """ - Safely evaluate an expression node or a string containing a Python - expression. The string or node provided may only consist of the following - Python literal structures: strings, numbers, tuples, lists, dicts, booleans, - and None. - """ - _safe_names = {'None': None, 'True': True, 'False': False} - if isinstance(node_or_string, basestring): - node_or_string = parse(node_or_string, mode='eval') - if isinstance(node_or_string, Expression): - node_or_string = node_or_string.body - def _convert(node): - if isinstance(node, Str): - return node.s - elif isinstance(node, Num): - return node.n - elif isinstance(node, Tuple): - return tuple(map(_convert, node.elts)) - elif isinstance(node, List): - return list(map(_convert, node.elts)) - elif isinstance(node, Dict): - return dict((_convert(k), _convert(v)) for k, v - in zip(node.keys, node.values)) - elif isinstance(node, Name): - if node.id in _safe_names: - return _safe_names[node.id] - elif isinstance(node, BinOp) and \ - isinstance(node.op, (Add, Sub)) and \ - isinstance(node.right, Num) and \ - isinstance(node.right.n, complex) and \ - isinstance(node.left, Num) and \ - isinstance(node.left.n, (int, long, float)): - left = node.left.n - right = node.right.n - if isinstance(node.op, Add): - return left + right - else: - return left - right - raise ValueError('malformed string') - return _convert(node_or_string) - - -def dump(node, annotate_fields=True, include_attributes=False): - """ - Return a formatted dump of the tree in *node*. This is mainly useful for - debugging purposes. The returned string will show the names and the values - for fields. This makes the code impossible to evaluate, so if evaluation is - wanted *annotate_fields* must be set to False. Attributes such as line - numbers and column offsets are not dumped by default. If this is wanted, - *include_attributes* can be set to True. - """ - def _format(node): - if isinstance(node, AST): - fields = [(a, _format(b)) for a, b in iter_fields(node)] - rv = '%s(%s' % (node.__class__.__name__, ', '.join( - ('%s=%s' % field for field in fields) - if annotate_fields else - (b for a, b in fields) - )) - if include_attributes and node._attributes: - rv += fields and ', ' or ' ' - rv += ', '.join('%s=%s' % (a, _format(getattr(node, a))) - for a in node._attributes) - return rv + ')' - elif isinstance(node, list): - return '[%s]' % ', '.join(_format(x) for x in node) - return repr(node) - if not isinstance(node, AST): - raise TypeError('expected AST, got %r' % node.__class__.__name__) - return _format(node) - - -def copy_location(new_node, old_node): - """ - Copy source location (`lineno` and `col_offset` attributes) from - *old_node* to *new_node* if possible, and return *new_node*. - """ - for attr in 'lineno', 'col_offset': - if attr in old_node._attributes and attr in new_node._attributes \ - and hasattr(old_node, attr): - setattr(new_node, attr, getattr(old_node, attr)) - return new_node - - -def fix_missing_locations(node): - """ - When you compile a node tree with compile(), the compiler expects lineno and - col_offset attributes for every node that supports them. This is rather - tedious to fill in for generated nodes, so this helper adds these attributes - recursively where not already set, by setting them to the values of the - parent node. It works recursively starting at *node*. - """ - def _fix(node, lineno, col_offset): - if 'lineno' in node._attributes: - if not hasattr(node, 'lineno'): - node.lineno = lineno - else: - lineno = node.lineno - if 'col_offset' in node._attributes: - if not hasattr(node, 'col_offset'): - node.col_offset = col_offset - else: - col_offset = node.col_offset - for child in iter_child_nodes(node): - _fix(child, lineno, col_offset) - _fix(node, 1, 0) - return node - - -def increment_lineno(node, n=1): - """ - Increment the line number of each node in the tree starting at *node* by *n*. - This is useful to "move code" to a different location in a file. - """ - for child in walk(node): - if 'lineno' in child._attributes: - child.lineno = getattr(child, 'lineno', 0) + n - return node - - -def iter_fields(node): - """ - Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields`` - that is present on *node*. - """ - for field in node._fields: - try: - yield field, getattr(node, field) - except AttributeError: - pass - - -def iter_child_nodes(node): - """ - Yield all direct child nodes of *node*, that is, all fields that are nodes - and all items of fields that are lists of nodes. - """ - for name, field in iter_fields(node): - if isinstance(field, AST): - yield field - elif isinstance(field, list): - for item in field: - if isinstance(item, AST): - yield item - - -def get_docstring(node, clean=True): - """ - Return the docstring for the given node or None if no docstring can - be found. If the node provided does not have docstrings a TypeError - will be raised. - """ - if not isinstance(node, (FunctionDef, ClassDef, Module)): - raise TypeError("%r can't have docstrings" % node.__class__.__name__) - if node.body and isinstance(node.body[0], Expr) and \ - isinstance(node.body[0].value, Str): - if clean: - import inspect - return inspect.cleandoc(node.body[0].value.s) - return node.body[0].value.s - - -def walk(node): - """ - Recursively yield all descendant nodes in the tree starting at *node* - (including *node* itself), in no specified order. This is useful if you - only want to modify nodes in place and don't care about the context. - """ - from collections import deque - todo = deque([node]) - while todo: - node = todo.popleft() - todo.extend(iter_child_nodes(node)) - yield node - - -class NodeVisitor(object): - """ - A node visitor base class that walks the abstract syntax tree and calls a - visitor function for every node found. This function may return a value - which is forwarded by the `visit` method. - - This class is meant to be subclassed, with the subclass adding visitor - methods. - - Per default the visitor functions for the nodes are ``'visit_'`` + - class name of the node. So a `TryFinally` node visit function would - be `visit_TryFinally`. This behavior can be changed by overriding - the `visit` method. If no visitor function exists for a node - (return value `None`) the `generic_visit` visitor is used instead. - - Don't use the `NodeVisitor` if you want to apply changes to nodes during - traversing. For this a special visitor exists (`NodeTransformer`) that - allows modifications. - """ - - def visit(self, node): - """Visit a node.""" - method = 'visit_' + node.__class__.__name__ - visitor = getattr(self, method, self.generic_visit) - return visitor(node) - - def generic_visit(self, node): - """Called if no explicit visitor function exists for a node.""" - for field, value in iter_fields(node): - if isinstance(value, list): - for item in value: - if isinstance(item, AST): - self.visit(item) - elif isinstance(value, AST): - self.visit(value) - - -class NodeTransformer(NodeVisitor): - """ - A :class:`NodeVisitor` subclass that walks the abstract syntax tree and - allows modification of nodes. - - The `NodeTransformer` will walk the AST and use the return value of the - visitor methods to replace or remove the old node. If the return value of - the visitor method is ``None``, the node will be removed from its location, - otherwise it is replaced with the return value. The return value may be the - original node in which case no replacement takes place. - - Here is an example transformer that rewrites all occurrences of name lookups - (``foo``) to ``data['foo']``:: - - class RewriteName(NodeTransformer): - - def visit_Name(self, node): - return copy_location(Subscript( - value=Name(id='data', ctx=Load()), - slice=Index(value=Str(s=node.id)), - ctx=node.ctx - ), node) - - Keep in mind that if the node you're operating on has child nodes you must - either transform the child nodes yourself or call the :meth:`generic_visit` - method for the node first. - - For nodes that were part of a collection of statements (that applies to all - statement nodes), the visitor may also return a list of nodes rather than - just a single node. - - Usually you use the transformer like this:: - - node = YourTransformer().visit(node) - """ - - def generic_visit(self, node): - for field, old_value in iter_fields(node): - old_value = getattr(node, field, None) - if isinstance(old_value, list): - new_values = [] - for value in old_value: - if isinstance(value, AST): - value = self.visit(value) - if value is None: - continue - elif not isinstance(value, AST): - new_values.extend(value) - continue - new_values.append(value) - old_value[:] = new_values - elif isinstance(old_value, AST): - new_node = self.visit(old_value) - if new_node is None: - delattr(node, field) - else: - setattr(node, field, new_node) - return node diff --git a/python/Lib/asynchat.py b/python/Lib/asynchat.py deleted file mode 100755 index 57459a0821..0000000000 --- a/python/Lib/asynchat.py +++ /dev/null @@ -1,321 +0,0 @@ -# -*- Mode: Python; tab-width: 4 -*- -# Id: asynchat.py,v 2.26 2000/09/07 22:29:26 rushing Exp -# Author: Sam Rushing - -# ====================================================================== -# Copyright 1996 by Sam Rushing -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software and -# its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of Sam -# Rushing not be used in advertising or publicity pertaining to -# distribution of the software without specific, written prior -# permission. -# -# SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN -# NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR -# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -# ====================================================================== - -r"""A class supporting chat-style (command/response) protocols. - -This class adds support for 'chat' style protocols - where one side -sends a 'command', and the other sends a response (examples would be -the common internet protocols - smtp, nntp, ftp, etc..). - -The handle_read() method looks at the input stream for the current -'terminator' (usually '\r\n' for single-line responses, '\r\n.\r\n' -for multi-line output), calling self.found_terminator() on its -receipt. - -for example: -Say you build an async nntp client using this class. At the start -of the connection, you'll have self.terminator set to '\r\n', in -order to process the single-line greeting. Just before issuing a -'LIST' command you'll set it to '\r\n.\r\n'. The output of the LIST -command will be accumulated (using your own 'collect_incoming_data' -method) up to the terminator, and then control will be returned to -you - by calling your self.found_terminator() method. -""" - -import asyncore -import errno -import socket -from collections import deque -from sys import py3kwarning -from warnings import filterwarnings, catch_warnings - -_BLOCKING_IO_ERRORS = (errno.EAGAIN, errno.EALREADY, errno.EINPROGRESS, - errno.EWOULDBLOCK) - - -class async_chat (asyncore.dispatcher): - """This is an abstract class. You must derive from this class, and add - the two methods collect_incoming_data() and found_terminator()""" - - # these are overridable defaults - - ac_in_buffer_size = 4096 - ac_out_buffer_size = 4096 - - def __init__ (self, sock=None, map=None): - # for string terminator matching - self.ac_in_buffer = '' - - # we use a list here rather than cStringIO for a few reasons... - # del lst[:] is faster than sio.truncate(0) - # lst = [] is faster than sio.truncate(0) - # cStringIO will be gaining unicode support in py3k, which - # will negatively affect the performance of bytes compared to - # a ''.join() equivalent - self.incoming = [] - - # we toss the use of the "simple producer" and replace it with - # a pure deque, which the original fifo was a wrapping of - self.producer_fifo = deque() - asyncore.dispatcher.__init__ (self, sock, map) - - def collect_incoming_data(self, data): - raise NotImplementedError("must be implemented in subclass") - - def _collect_incoming_data(self, data): - self.incoming.append(data) - - def _get_data(self): - d = ''.join(self.incoming) - del self.incoming[:] - return d - - def found_terminator(self): - raise NotImplementedError("must be implemented in subclass") - - def set_terminator (self, term): - "Set the input delimiter. Can be a fixed string of any length, an integer, or None" - self.terminator = term - - def get_terminator (self): - return self.terminator - - # grab some more data from the socket, - # throw it to the collector method, - # check for the terminator, - # if found, transition to the next state. - - def handle_read (self): - - try: - data = self.recv (self.ac_in_buffer_size) - except socket.error, why: - if why.args[0] in _BLOCKING_IO_ERRORS: - return - self.handle_error() - return - - self.ac_in_buffer = self.ac_in_buffer + data - - # Continue to search for self.terminator in self.ac_in_buffer, - # while calling self.collect_incoming_data. The while loop - # is necessary because we might read several data+terminator - # combos with a single recv(4096). - - while self.ac_in_buffer: - lb = len(self.ac_in_buffer) - terminator = self.get_terminator() - if not terminator: - # no terminator, collect it all - self.collect_incoming_data (self.ac_in_buffer) - self.ac_in_buffer = '' - elif isinstance(terminator, int) or isinstance(terminator, long): - # numeric terminator - n = terminator - if lb < n: - self.collect_incoming_data (self.ac_in_buffer) - self.ac_in_buffer = '' - self.terminator = self.terminator - lb - else: - self.collect_incoming_data (self.ac_in_buffer[:n]) - self.ac_in_buffer = self.ac_in_buffer[n:] - self.terminator = 0 - self.found_terminator() - else: - # 3 cases: - # 1) end of buffer matches terminator exactly: - # collect data, transition - # 2) end of buffer matches some prefix: - # collect data to the prefix - # 3) end of buffer does not match any prefix: - # collect data - terminator_len = len(terminator) - index = self.ac_in_buffer.find(terminator) - if index != -1: - # we found the terminator - if index > 0: - # don't bother reporting the empty string (source of subtle bugs) - self.collect_incoming_data (self.ac_in_buffer[:index]) - self.ac_in_buffer = self.ac_in_buffer[index+terminator_len:] - # This does the Right Thing if the terminator is changed here. - self.found_terminator() - else: - # check for a prefix of the terminator - index = find_prefix_at_end (self.ac_in_buffer, terminator) - if index: - if index != lb: - # we found a prefix, collect up to the prefix - self.collect_incoming_data (self.ac_in_buffer[:-index]) - self.ac_in_buffer = self.ac_in_buffer[-index:] - break - else: - # no prefix, collect it all - self.collect_incoming_data (self.ac_in_buffer) - self.ac_in_buffer = '' - - def handle_write (self): - self.initiate_send() - - def handle_close (self): - self.close() - - def push (self, data): - sabs = self.ac_out_buffer_size - if len(data) > sabs: - for i in xrange(0, len(data), sabs): - self.producer_fifo.append(data[i:i+sabs]) - else: - self.producer_fifo.append(data) - self.initiate_send() - - def push_with_producer (self, producer): - self.producer_fifo.append(producer) - self.initiate_send() - - def readable (self): - "predicate for inclusion in the readable for select()" - # cannot use the old predicate, it violates the claim of the - # set_terminator method. - - # return (len(self.ac_in_buffer) <= self.ac_in_buffer_size) - return 1 - - def writable (self): - "predicate for inclusion in the writable for select()" - return self.producer_fifo or (not self.connected) - - def close_when_done (self): - "automatically close this channel once the outgoing queue is empty" - self.producer_fifo.append(None) - - def initiate_send(self): - while self.producer_fifo and self.connected: - first = self.producer_fifo[0] - # handle empty string/buffer or None entry - if not first: - del self.producer_fifo[0] - if first is None: - self.handle_close() - return - - # handle classic producer behavior - obs = self.ac_out_buffer_size - try: - with catch_warnings(): - if py3kwarning: - filterwarnings("ignore", ".*buffer", DeprecationWarning) - data = buffer(first, 0, obs) - except TypeError: - data = first.more() - if data: - self.producer_fifo.appendleft(data) - else: - del self.producer_fifo[0] - continue - - # send the data - try: - num_sent = self.send(data) - except socket.error: - self.handle_error() - return - - if num_sent: - if num_sent < len(data) or obs < len(first): - self.producer_fifo[0] = first[num_sent:] - else: - del self.producer_fifo[0] - # we tried to send some actual data - return - - def discard_buffers (self): - # Emergencies only! - self.ac_in_buffer = '' - del self.incoming[:] - self.producer_fifo.clear() - -class simple_producer: - - def __init__ (self, data, buffer_size=512): - self.data = data - self.buffer_size = buffer_size - - def more (self): - if len (self.data) > self.buffer_size: - result = self.data[:self.buffer_size] - self.data = self.data[self.buffer_size:] - return result - else: - result = self.data - self.data = '' - return result - -class fifo: - def __init__ (self, list=None): - if not list: - self.list = deque() - else: - self.list = deque(list) - - def __len__ (self): - return len(self.list) - - def is_empty (self): - return not self.list - - def first (self): - return self.list[0] - - def push (self, data): - self.list.append(data) - - def pop (self): - if self.list: - return (1, self.list.popleft()) - else: - return (0, None) - -# Given 'haystack', see if any prefix of 'needle' is at its end. This -# assumes an exact match has already been checked. Return the number of -# characters matched. -# for example: -# f_p_a_e ("qwerty\r", "\r\n") => 1 -# f_p_a_e ("qwertydkjf", "\r\n") => 0 -# f_p_a_e ("qwerty\r\n", "\r\n") => - -# this could maybe be made faster with a computed regex? -# [answer: no; circa Python-2.0, Jan 2001] -# new python: 28961/s -# old python: 18307/s -# re: 12820/s -# regex: 14035/s - -def find_prefix_at_end (haystack, needle): - l = len(needle) - 1 - while l and not haystack.endswith(needle[:l]): - l -= 1 - return l diff --git a/python/Lib/asyncore.py b/python/Lib/asyncore.py deleted file mode 100755 index 29099bdf5c..0000000000 --- a/python/Lib/asyncore.py +++ /dev/null @@ -1,659 +0,0 @@ -# -*- Mode: Python -*- -# Id: asyncore.py,v 2.51 2000/09/07 22:29:26 rushing Exp -# Author: Sam Rushing - -# ====================================================================== -# Copyright 1996 by Sam Rushing -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software and -# its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of Sam -# Rushing not be used in advertising or publicity pertaining to -# distribution of the software without specific, written prior -# permission. -# -# SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN -# NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR -# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -# ====================================================================== - -"""Basic infrastructure for asynchronous socket service clients and servers. - -There are only two ways to have a program on a single processor do "more -than one thing at a time". Multi-threaded programming is the simplest and -most popular way to do it, but there is another very different technique, -that lets you have nearly all the advantages of multi-threading, without -actually using multiple threads. it's really only practical if your program -is largely I/O bound. If your program is CPU bound, then pre-emptive -scheduled threads are probably what you really need. Network servers are -rarely CPU-bound, however. - -If your operating system supports the select() system call in its I/O -library (and nearly all do), then you can use it to juggle multiple -communication channels at once; doing other work while your I/O is taking -place in the "background." Although this strategy can seem strange and -complex, especially at first, it is in many ways easier to understand and -control than multi-threaded programming. The module documented here solves -many of the difficult problems for you, making the task of building -sophisticated high-performance network servers and clients a snap. -""" - -import select -import socket -import sys -import time -import warnings - -import os -from errno import EALREADY, EINPROGRESS, EWOULDBLOCK, ECONNRESET, EINVAL, \ - ENOTCONN, ESHUTDOWN, EINTR, EISCONN, EBADF, ECONNABORTED, EPIPE, EAGAIN, \ - errorcode - -_DISCONNECTED = frozenset((ECONNRESET, ENOTCONN, ESHUTDOWN, ECONNABORTED, EPIPE, - EBADF)) - -try: - socket_map -except NameError: - socket_map = {} - -def _strerror(err): - try: - return os.strerror(err) - except (ValueError, OverflowError, NameError): - if err in errorcode: - return errorcode[err] - return "Unknown error %s" %err - -class ExitNow(Exception): - pass - -_reraised_exceptions = (ExitNow, KeyboardInterrupt, SystemExit) - -def read(obj): - try: - obj.handle_read_event() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def write(obj): - try: - obj.handle_write_event() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def _exception(obj): - try: - obj.handle_expt_event() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def readwrite(obj, flags): - try: - if flags & select.POLLIN: - obj.handle_read_event() - if flags & select.POLLOUT: - obj.handle_write_event() - if flags & select.POLLPRI: - obj.handle_expt_event() - if flags & (select.POLLHUP | select.POLLERR | select.POLLNVAL): - obj.handle_close() - except socket.error, e: - if e.args[0] not in _DISCONNECTED: - obj.handle_error() - else: - obj.handle_close() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def poll(timeout=0.0, map=None): - if map is None: - map = socket_map - if map: - r = []; w = []; e = [] - for fd, obj in map.items(): - is_r = obj.readable() - is_w = obj.writable() - if is_r: - r.append(fd) - # accepting sockets should not be writable - if is_w and not obj.accepting: - w.append(fd) - if is_r or is_w: - e.append(fd) - if [] == r == w == e: - time.sleep(timeout) - return - - try: - r, w, e = select.select(r, w, e, timeout) - except select.error, err: - if err.args[0] != EINTR: - raise - else: - return - - for fd in r: - obj = map.get(fd) - if obj is None: - continue - read(obj) - - for fd in w: - obj = map.get(fd) - if obj is None: - continue - write(obj) - - for fd in e: - obj = map.get(fd) - if obj is None: - continue - _exception(obj) - -def poll2(timeout=0.0, map=None): - # Use the poll() support added to the select module in Python 2.0 - if map is None: - map = socket_map - if timeout is not None: - # timeout is in milliseconds - timeout = int(timeout*1000) - pollster = select.poll() - if map: - for fd, obj in map.items(): - flags = 0 - if obj.readable(): - flags |= select.POLLIN | select.POLLPRI - # accepting sockets should not be writable - if obj.writable() and not obj.accepting: - flags |= select.POLLOUT - if flags: - # Only check for exceptions if object was either readable - # or writable. - flags |= select.POLLERR | select.POLLHUP | select.POLLNVAL - pollster.register(fd, flags) - try: - r = pollster.poll(timeout) - except select.error, err: - if err.args[0] != EINTR: - raise - r = [] - for fd, flags in r: - obj = map.get(fd) - if obj is None: - continue - readwrite(obj, flags) - -poll3 = poll2 # Alias for backward compatibility - -def loop(timeout=30.0, use_poll=False, map=None, count=None): - if map is None: - map = socket_map - - if use_poll and hasattr(select, 'poll'): - poll_fun = poll2 - else: - poll_fun = poll - - if count is None: - while map: - poll_fun(timeout, map) - - else: - while map and count > 0: - poll_fun(timeout, map) - count = count - 1 - -class dispatcher: - - debug = False - connected = False - accepting = False - connecting = False - closing = False - addr = None - ignore_log_types = frozenset(['warning']) - - def __init__(self, sock=None, map=None): - if map is None: - self._map = socket_map - else: - self._map = map - - self._fileno = None - - if sock: - # Set to nonblocking just to make sure for cases where we - # get a socket from a blocking source. - sock.setblocking(0) - self.set_socket(sock, map) - self.connected = True - # The constructor no longer requires that the socket - # passed be connected. - try: - self.addr = sock.getpeername() - except socket.error, err: - if err.args[0] in (ENOTCONN, EINVAL): - # To handle the case where we got an unconnected - # socket. - self.connected = False - else: - # The socket is broken in some unknown way, alert - # the user and remove it from the map (to prevent - # polling of broken sockets). - self.del_channel(map) - raise - else: - self.socket = None - - def __repr__(self): - status = [self.__class__.__module__+"."+self.__class__.__name__] - if self.accepting and self.addr: - status.append('listening') - elif self.connected: - status.append('connected') - if self.addr is not None: - try: - status.append('%s:%d' % self.addr) - except TypeError: - status.append(repr(self.addr)) - return '<%s at %#x>' % (' '.join(status), id(self)) - - __str__ = __repr__ - - def add_channel(self, map=None): - #self.log_info('adding channel %s' % self) - if map is None: - map = self._map - map[self._fileno] = self - - def del_channel(self, map=None): - fd = self._fileno - if map is None: - map = self._map - if fd in map: - #self.log_info('closing channel %d:%s' % (fd, self)) - del map[fd] - self._fileno = None - - def create_socket(self, family, type): - self.family_and_type = family, type - sock = socket.socket(family, type) - sock.setblocking(0) - self.set_socket(sock) - - def set_socket(self, sock, map=None): - self.socket = sock -## self.__dict__['socket'] = sock - self._fileno = sock.fileno() - self.add_channel(map) - - def set_reuse_addr(self): - # try to re-use a server port if possible - try: - self.socket.setsockopt( - socket.SOL_SOCKET, socket.SO_REUSEADDR, - self.socket.getsockopt(socket.SOL_SOCKET, - socket.SO_REUSEADDR) | 1 - ) - except socket.error: - pass - - # ================================================== - # predicates for select() - # these are used as filters for the lists of sockets - # to pass to select(). - # ================================================== - - def readable(self): - return True - - def writable(self): - return True - - # ================================================== - # socket object methods. - # ================================================== - - def listen(self, num): - self.accepting = True - if os.name == 'nt' and num > 5: - num = 5 - return self.socket.listen(num) - - def bind(self, addr): - self.addr = addr - return self.socket.bind(addr) - - def connect(self, address): - self.connected = False - self.connecting = True - err = self.socket.connect_ex(address) - if err in (EINPROGRESS, EALREADY, EWOULDBLOCK) \ - or err == EINVAL and os.name in ('nt', 'ce'): - self.addr = address - return - if err in (0, EISCONN): - self.addr = address - self.handle_connect_event() - else: - raise socket.error(err, errorcode[err]) - - def accept(self): - # XXX can return either an address pair or None - try: - conn, addr = self.socket.accept() - except TypeError: - return None - except socket.error as why: - if why.args[0] in (EWOULDBLOCK, ECONNABORTED, EAGAIN): - return None - else: - raise - else: - return conn, addr - - def send(self, data): - try: - result = self.socket.send(data) - return result - except socket.error, why: - if why.args[0] == EWOULDBLOCK: - return 0 - elif why.args[0] in _DISCONNECTED: - self.handle_close() - return 0 - else: - raise - - def recv(self, buffer_size): - try: - data = self.socket.recv(buffer_size) - if not data: - # a closed connection is indicated by signaling - # a read condition, and having recv() return 0. - self.handle_close() - return '' - else: - return data - except socket.error, why: - # winsock sometimes raises ENOTCONN - if why.args[0] in _DISCONNECTED: - self.handle_close() - return '' - else: - raise - - def close(self): - self.connected = False - self.accepting = False - self.connecting = False - self.del_channel() - try: - self.socket.close() - except socket.error, why: - if why.args[0] not in (ENOTCONN, EBADF): - raise - - # cheap inheritance, used to pass all other attribute - # references to the underlying socket object. - def __getattr__(self, attr): - try: - retattr = getattr(self.socket, attr) - except AttributeError: - raise AttributeError("%s instance has no attribute '%s'" - %(self.__class__.__name__, attr)) - else: - msg = "%(me)s.%(attr)s is deprecated. Use %(me)s.socket.%(attr)s " \ - "instead." % {'me': self.__class__.__name__, 'attr':attr} - warnings.warn(msg, DeprecationWarning, stacklevel=2) - return retattr - - # log and log_info may be overridden to provide more sophisticated - # logging and warning methods. In general, log is for 'hit' logging - # and 'log_info' is for informational, warning and error logging. - - def log(self, message): - sys.stderr.write('log: %s\n' % str(message)) - - def log_info(self, message, type='info'): - if type not in self.ignore_log_types: - print '%s: %s' % (type, message) - - def handle_read_event(self): - if self.accepting: - # accepting sockets are never connected, they "spawn" new - # sockets that are connected - self.handle_accept() - elif not self.connected: - if self.connecting: - self.handle_connect_event() - self.handle_read() - else: - self.handle_read() - - def handle_connect_event(self): - err = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) - if err != 0: - raise socket.error(err, _strerror(err)) - self.handle_connect() - self.connected = True - self.connecting = False - - def handle_write_event(self): - if self.accepting: - # Accepting sockets shouldn't get a write event. - # We will pretend it didn't happen. - return - - if not self.connected: - if self.connecting: - self.handle_connect_event() - self.handle_write() - - def handle_expt_event(self): - # handle_expt_event() is called if there might be an error on the - # socket, or if there is OOB data - # check for the error condition first - err = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) - if err != 0: - # we can get here when select.select() says that there is an - # exceptional condition on the socket - # since there is an error, we'll go ahead and close the socket - # like we would in a subclassed handle_read() that received no - # data - self.handle_close() - else: - self.handle_expt() - - def handle_error(self): - nil, t, v, tbinfo = compact_traceback() - - # sometimes a user repr method will crash. - try: - self_repr = repr(self) - except: - self_repr = '<__repr__(self) failed for object at %0x>' % id(self) - - self.log_info( - 'uncaptured python exception, closing channel %s (%s:%s %s)' % ( - self_repr, - t, - v, - tbinfo - ), - 'error' - ) - self.handle_close() - - def handle_expt(self): - self.log_info('unhandled incoming priority event', 'warning') - - def handle_read(self): - self.log_info('unhandled read event', 'warning') - - def handle_write(self): - self.log_info('unhandled write event', 'warning') - - def handle_connect(self): - self.log_info('unhandled connect event', 'warning') - - def handle_accept(self): - self.log_info('unhandled accept event', 'warning') - - def handle_close(self): - self.log_info('unhandled close event', 'warning') - self.close() - -# --------------------------------------------------------------------------- -# adds simple buffered output capability, useful for simple clients. -# [for more sophisticated usage use asynchat.async_chat] -# --------------------------------------------------------------------------- - -class dispatcher_with_send(dispatcher): - - def __init__(self, sock=None, map=None): - dispatcher.__init__(self, sock, map) - self.out_buffer = '' - - def initiate_send(self): - num_sent = 0 - num_sent = dispatcher.send(self, self.out_buffer[:512]) - self.out_buffer = self.out_buffer[num_sent:] - - def handle_write(self): - self.initiate_send() - - def writable(self): - return (not self.connected) or len(self.out_buffer) - - def send(self, data): - if self.debug: - self.log_info('sending %s' % repr(data)) - self.out_buffer = self.out_buffer + data - self.initiate_send() - -# --------------------------------------------------------------------------- -# used for debugging. -# --------------------------------------------------------------------------- - -def compact_traceback(): - t, v, tb = sys.exc_info() - tbinfo = [] - if not tb: # Must have a traceback - raise AssertionError("traceback does not exist") - while tb: - tbinfo.append(( - tb.tb_frame.f_code.co_filename, - tb.tb_frame.f_code.co_name, - str(tb.tb_lineno) - )) - tb = tb.tb_next - - # just to be safe - del tb - - file, function, line = tbinfo[-1] - info = ' '.join(['[%s|%s|%s]' % x for x in tbinfo]) - return (file, function, line), t, v, info - -def close_all(map=None, ignore_all=False): - if map is None: - map = socket_map - for x in map.values(): - try: - x.close() - except OSError, x: - if x.args[0] == EBADF: - pass - elif not ignore_all: - raise - except _reraised_exceptions: - raise - except: - if not ignore_all: - raise - map.clear() - -# Asynchronous File I/O: -# -# After a little research (reading man pages on various unixen, and -# digging through the linux kernel), I've determined that select() -# isn't meant for doing asynchronous file i/o. -# Heartening, though - reading linux/mm/filemap.c shows that linux -# supports asynchronous read-ahead. So _MOST_ of the time, the data -# will be sitting in memory for us already when we go to read it. -# -# What other OS's (besides NT) support async file i/o? [VMS?] -# -# Regardless, this is useful for pipes, and stdin/stdout... - -if os.name == 'posix': - import fcntl - - class file_wrapper: - # Here we override just enough to make a file - # look like a socket for the purposes of asyncore. - # The passed fd is automatically os.dup()'d - - def __init__(self, fd): - self.fd = os.dup(fd) - - def recv(self, *args): - return os.read(self.fd, *args) - - def send(self, *args): - return os.write(self.fd, *args) - - def getsockopt(self, level, optname, buflen=None): - if (level == socket.SOL_SOCKET and - optname == socket.SO_ERROR and - not buflen): - return 0 - raise NotImplementedError("Only asyncore specific behaviour " - "implemented.") - - read = recv - write = send - - def close(self): - os.close(self.fd) - - def fileno(self): - return self.fd - - class file_dispatcher(dispatcher): - - def __init__(self, fd, map=None): - dispatcher.__init__(self, None, map) - self.connected = True - try: - fd = fd.fileno() - except AttributeError: - pass - self.set_file(fd) - # set it to non-blocking mode - flags = fcntl.fcntl(fd, fcntl.F_GETFL, 0) - flags = flags | os.O_NONBLOCK - fcntl.fcntl(fd, fcntl.F_SETFL, flags) - - def set_file(self, fd): - self.socket = file_wrapper(fd) - self._fileno = self.socket.fileno() - self.add_channel() diff --git a/python/Lib/atexit.py b/python/Lib/atexit.py deleted file mode 100755 index 93fddf7f99..0000000000 --- a/python/Lib/atexit.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -atexit.py - allow programmer to define multiple exit functions to be executed -upon normal program termination. - -One public function, register, is defined. -""" - -__all__ = ["register"] - -import sys - -_exithandlers = [] -def _run_exitfuncs(): - """run any registered exit functions - - _exithandlers is traversed in reverse order so functions are executed - last in, first out. - """ - - exc_info = None - while _exithandlers: - func, targs, kargs = _exithandlers.pop() - try: - func(*targs, **kargs) - except SystemExit: - exc_info = sys.exc_info() - except: - import traceback - print >> sys.stderr, "Error in atexit._run_exitfuncs:" - traceback.print_exc() - exc_info = sys.exc_info() - - if exc_info is not None: - raise exc_info[0], exc_info[1], exc_info[2] - - -def register(func, *targs, **kargs): - """register a function to be executed upon normal program termination - - func - function to be called at exit - targs - optional arguments to pass to func - kargs - optional keyword arguments to pass to func - - func is returned to facilitate usage as a decorator. - """ - _exithandlers.append((func, targs, kargs)) - return func - -if hasattr(sys, "exitfunc"): - # Assume it's another registered exit function - append it to our list - register(sys.exitfunc) -sys.exitfunc = _run_exitfuncs - -if __name__ == "__main__": - def x1(): - print "running x1" - def x2(n): - print "running x2(%r)" % (n,) - def x3(n, kwd=None): - print "running x3(%r, kwd=%r)" % (n, kwd) - - register(x1) - register(x2, 12) - register(x3, 5, "bar") - register(x3, "no kwd args") diff --git a/python/Lib/audiodev.py b/python/Lib/audiodev.py deleted file mode 100755 index b6831a692f..0000000000 --- a/python/Lib/audiodev.py +++ /dev/null @@ -1,260 +0,0 @@ -"""Classes for manipulating audio devices (currently only for Sun and SGI)""" -from warnings import warnpy3k -warnpy3k("the audiodev module has been removed in Python 3.0", stacklevel=2) -del warnpy3k - -__all__ = ["error","AudioDev"] - -class error(Exception): - pass - -class Play_Audio_sgi: - # Private instance variables -## if 0: access frameratelist, nchannelslist, sampwidthlist, oldparams, \ -## params, config, inited_outrate, inited_width, \ -## inited_nchannels, port, converter, classinited: private - - classinited = 0 - frameratelist = nchannelslist = sampwidthlist = None - - def initclass(self): - import AL - self.frameratelist = [ - (48000, AL.RATE_48000), - (44100, AL.RATE_44100), - (32000, AL.RATE_32000), - (22050, AL.RATE_22050), - (16000, AL.RATE_16000), - (11025, AL.RATE_11025), - ( 8000, AL.RATE_8000), - ] - self.nchannelslist = [ - (1, AL.MONO), - (2, AL.STEREO), - (4, AL.QUADRO), - ] - self.sampwidthlist = [ - (1, AL.SAMPLE_8), - (2, AL.SAMPLE_16), - (3, AL.SAMPLE_24), - ] - self.classinited = 1 - - def __init__(self): - import al, AL - if not self.classinited: - self.initclass() - self.oldparams = [] - self.params = [AL.OUTPUT_RATE, 0] - self.config = al.newconfig() - self.inited_outrate = 0 - self.inited_width = 0 - self.inited_nchannels = 0 - self.converter = None - self.port = None - return - - def __del__(self): - if self.port: - self.stop() - if self.oldparams: - import al, AL - al.setparams(AL.DEFAULT_DEVICE, self.oldparams) - self.oldparams = [] - - def wait(self): - if not self.port: - return - import time - while self.port.getfilled() > 0: - time.sleep(0.1) - self.stop() - - def stop(self): - if self.port: - self.port.closeport() - self.port = None - if self.oldparams: - import al, AL - al.setparams(AL.DEFAULT_DEVICE, self.oldparams) - self.oldparams = [] - - def setoutrate(self, rate): - for (raw, cooked) in self.frameratelist: - if rate == raw: - self.params[1] = cooked - self.inited_outrate = 1 - break - else: - raise error, 'bad output rate' - - def setsampwidth(self, width): - for (raw, cooked) in self.sampwidthlist: - if width == raw: - self.config.setwidth(cooked) - self.inited_width = 1 - break - else: - if width == 0: - import AL - self.inited_width = 0 - self.config.setwidth(AL.SAMPLE_16) - self.converter = self.ulaw2lin - else: - raise error, 'bad sample width' - - def setnchannels(self, nchannels): - for (raw, cooked) in self.nchannelslist: - if nchannels == raw: - self.config.setchannels(cooked) - self.inited_nchannels = 1 - break - else: - raise error, 'bad # of channels' - - def writeframes(self, data): - if not (self.inited_outrate and self.inited_nchannels): - raise error, 'params not specified' - if not self.port: - import al, AL - self.port = al.openport('Python', 'w', self.config) - self.oldparams = self.params[:] - al.getparams(AL.DEFAULT_DEVICE, self.oldparams) - al.setparams(AL.DEFAULT_DEVICE, self.params) - if self.converter: - data = self.converter(data) - self.port.writesamps(data) - - def getfilled(self): - if self.port: - return self.port.getfilled() - else: - return 0 - - def getfillable(self): - if self.port: - return self.port.getfillable() - else: - return self.config.getqueuesize() - - # private methods -## if 0: access *: private - - def ulaw2lin(self, data): - import audioop - return audioop.ulaw2lin(data, 2) - -class Play_Audio_sun: -## if 0: access outrate, sampwidth, nchannels, inited_outrate, inited_width, \ -## inited_nchannels, converter: private - - def __init__(self): - self.outrate = 0 - self.sampwidth = 0 - self.nchannels = 0 - self.inited_outrate = 0 - self.inited_width = 0 - self.inited_nchannels = 0 - self.converter = None - self.port = None - return - - def __del__(self): - self.stop() - - def setoutrate(self, rate): - self.outrate = rate - self.inited_outrate = 1 - - def setsampwidth(self, width): - self.sampwidth = width - self.inited_width = 1 - - def setnchannels(self, nchannels): - self.nchannels = nchannels - self.inited_nchannels = 1 - - def writeframes(self, data): - if not (self.inited_outrate and self.inited_width and self.inited_nchannels): - raise error, 'params not specified' - if not self.port: - import sunaudiodev, SUNAUDIODEV - self.port = sunaudiodev.open('w') - info = self.port.getinfo() - info.o_sample_rate = self.outrate - info.o_channels = self.nchannels - if self.sampwidth == 0: - info.o_precision = 8 - self.o_encoding = SUNAUDIODEV.ENCODING_ULAW - # XXX Hack, hack -- leave defaults - else: - info.o_precision = 8 * self.sampwidth - info.o_encoding = SUNAUDIODEV.ENCODING_LINEAR - self.port.setinfo(info) - if self.converter: - data = self.converter(data) - self.port.write(data) - - def wait(self): - if not self.port: - return - self.port.drain() - self.stop() - - def stop(self): - if self.port: - self.port.flush() - self.port.close() - self.port = None - - def getfilled(self): - if self.port: - return self.port.obufcount() - else: - return 0 - -## # Nobody remembers what this method does, and it's broken. :-( -## def getfillable(self): -## return BUFFERSIZE - self.getfilled() - -def AudioDev(): - # Dynamically try to import and use a platform specific module. - try: - import al - except ImportError: - try: - import sunaudiodev - return Play_Audio_sun() - except ImportError: - try: - import Audio_mac - except ImportError: - raise error, 'no audio device' - else: - return Audio_mac.Play_Audio_mac() - else: - return Play_Audio_sgi() - -def test(fn = None): - import sys - if sys.argv[1:]: - fn = sys.argv[1] - else: - fn = 'f:just samples:just.aif' - import aifc - af = aifc.open(fn, 'r') - print fn, af.getparams() - p = AudioDev() - p.setoutrate(af.getframerate()) - p.setsampwidth(af.getsampwidth()) - p.setnchannels(af.getnchannels()) - BUFSIZ = af.getframerate()/af.getsampwidth()/af.getnchannels() - while 1: - data = af.readframes(BUFSIZ) - if not data: break - print len(data) - p.writeframes(data) - p.wait() - -if __name__ == '__main__': - test() diff --git a/python/Lib/base64.py b/python/Lib/base64.py deleted file mode 100755 index 38bc61ee98..0000000000 --- a/python/Lib/base64.py +++ /dev/null @@ -1,367 +0,0 @@ -#! /usr/bin/env python - -"""RFC 3548: Base16, Base32, Base64 Data Encodings""" - -# Modified 04-Oct-1995 by Jack Jansen to use binascii module -# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support - -import re -import struct -import string -import binascii - - -__all__ = [ - # Legacy interface exports traditional RFC 1521 Base64 encodings - 'encode', 'decode', 'encodestring', 'decodestring', - # Generalized interface for other encodings - 'b64encode', 'b64decode', 'b32encode', 'b32decode', - 'b16encode', 'b16decode', - # Standard Base64 encoding - 'standard_b64encode', 'standard_b64decode', - # Some common Base64 alternatives. As referenced by RFC 3458, see thread - # starting at: - # - # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html - 'urlsafe_b64encode', 'urlsafe_b64decode', - ] - -_translation = [chr(_x) for _x in range(256)] -EMPTYSTRING = '' - - -def _translate(s, altchars): - translation = _translation[:] - for k, v in altchars.items(): - translation[ord(k)] = v - return s.translate(''.join(translation)) - - - -# Base64 encoding/decoding uses binascii - -def b64encode(s, altchars=None): - """Encode a string using Base64. - - s is the string to encode. Optional altchars must be a string of at least - length 2 (additional characters are ignored) which specifies an - alternative alphabet for the '+' and '/' characters. This allows an - application to e.g. generate url or filesystem safe Base64 strings. - - The encoded string is returned. - """ - # Strip off the trailing newline - encoded = binascii.b2a_base64(s)[:-1] - if altchars is not None: - return encoded.translate(string.maketrans(b'+/', altchars[:2])) - return encoded - - -def b64decode(s, altchars=None): - """Decode a Base64 encoded string. - - s is the string to decode. Optional altchars must be a string of at least - length 2 (additional characters are ignored) which specifies the - alternative alphabet used instead of the '+' and '/' characters. - - The decoded string is returned. A TypeError is raised if s is - incorrectly padded. Characters that are neither in the normal base-64 - alphabet nor the alternative alphabet are discarded prior to the padding - check. - """ - if altchars is not None: - s = s.translate(string.maketrans(altchars[:2], '+/')) - try: - return binascii.a2b_base64(s) - except binascii.Error, msg: - # Transform this exception for consistency - raise TypeError(msg) - - -def standard_b64encode(s): - """Encode a string using the standard Base64 alphabet. - - s is the string to encode. The encoded string is returned. - """ - return b64encode(s) - -def standard_b64decode(s): - """Decode a string encoded with the standard Base64 alphabet. - - Argument s is the string to decode. The decoded string is returned. A - TypeError is raised if the string is incorrectly padded. Characters that - are not in the standard alphabet are discarded prior to the padding - check. - """ - return b64decode(s) - -_urlsafe_encode_translation = string.maketrans(b'+/', b'-_') -_urlsafe_decode_translation = string.maketrans(b'-_', b'+/') - -def urlsafe_b64encode(s): - """Encode a string using the URL- and filesystem-safe Base64 alphabet. - - Argument s is the string to encode. The encoded string is returned. The - alphabet uses '-' instead of '+' and '_' instead of '/'. - """ - return b64encode(s).translate(_urlsafe_encode_translation) - -def urlsafe_b64decode(s): - """Decode a string using the URL- and filesystem-safe Base64 alphabet. - - Argument s is the string to decode. The decoded string is returned. A - TypeError is raised if the string is incorrectly padded. Characters that - are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash - '/', are discarded prior to the padding check. - - The alphabet uses '-' instead of '+' and '_' instead of '/'. - """ - return b64decode(s.translate(_urlsafe_decode_translation)) - - - -# Base32 encoding/decoding must be done in Python -_b32alphabet = { - 0: 'A', 9: 'J', 18: 'S', 27: '3', - 1: 'B', 10: 'K', 19: 'T', 28: '4', - 2: 'C', 11: 'L', 20: 'U', 29: '5', - 3: 'D', 12: 'M', 21: 'V', 30: '6', - 4: 'E', 13: 'N', 22: 'W', 31: '7', - 5: 'F', 14: 'O', 23: 'X', - 6: 'G', 15: 'P', 24: 'Y', - 7: 'H', 16: 'Q', 25: 'Z', - 8: 'I', 17: 'R', 26: '2', - } - -_b32tab = _b32alphabet.items() -_b32tab.sort() -_b32tab = [v for k, v in _b32tab] -_b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()]) - - -def b32encode(s): - """Encode a string using Base32. - - s is the string to encode. The encoded string is returned. - """ - parts = [] - quanta, leftover = divmod(len(s), 5) - # Pad the last quantum with zero bits if necessary - if leftover: - s += ('\0' * (5 - leftover)) - quanta += 1 - for i in range(quanta): - # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this - # code is to process the 40 bits in units of 5 bits. So we take the 1 - # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover - # bits of c2 and tack them onto c3. The shifts and masks are intended - # to give us values of exactly 5 bits in width. - c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5]) - c2 += (c1 & 1) << 16 # 17 bits wide - c3 += (c2 & 3) << 8 # 10 bits wide - parts.extend([_b32tab[c1 >> 11], # bits 1 - 5 - _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10 - _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15 - _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5) - _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10) - _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15) - _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5) - _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5) - ]) - encoded = EMPTYSTRING.join(parts) - # Adjust for any leftover partial quanta - if leftover == 1: - return encoded[:-6] + '======' - elif leftover == 2: - return encoded[:-4] + '====' - elif leftover == 3: - return encoded[:-3] + '===' - elif leftover == 4: - return encoded[:-1] + '=' - return encoded - - -def b32decode(s, casefold=False, map01=None): - """Decode a Base32 encoded string. - - s is the string to decode. Optional casefold is a flag specifying whether - a lowercase alphabet is acceptable as input. For security purposes, the - default is False. - - RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O - (oh), and for optional mapping of the digit 1 (one) to either the letter I - (eye) or letter L (el). The optional argument map01 when not None, - specifies which letter the digit 1 should be mapped to (when map01 is not - None, the digit 0 is always mapped to the letter O). For security - purposes the default is None, so that 0 and 1 are not allowed in the - input. - - The decoded string is returned. A TypeError is raised if s were - incorrectly padded or if there are non-alphabet characters present in the - string. - """ - quanta, leftover = divmod(len(s), 8) - if leftover: - raise TypeError('Incorrect padding') - # Handle section 2.4 zero and one mapping. The flag map01 will be either - # False, or the character to map the digit 1 (one) to. It should be - # either L (el) or I (eye). - if map01: - s = s.translate(string.maketrans(b'01', b'O' + map01)) - if casefold: - s = s.upper() - # Strip off pad characters from the right. We need to count the pad - # characters because this will tell us how many null bytes to remove from - # the end of the decoded string. - padchars = 0 - mo = re.search('(?P[=]*)$', s) - if mo: - padchars = len(mo.group('pad')) - if padchars > 0: - s = s[:-padchars] - # Now decode the full quanta - parts = [] - acc = 0 - shift = 35 - for c in s: - val = _b32rev.get(c) - if val is None: - raise TypeError('Non-base32 digit found') - acc += _b32rev[c] << shift - shift -= 5 - if shift < 0: - parts.append(binascii.unhexlify('%010x' % acc)) - acc = 0 - shift = 35 - # Process the last, partial quanta - last = binascii.unhexlify('%010x' % acc) - if padchars == 0: - last = '' # No characters - elif padchars == 1: - last = last[:-1] - elif padchars == 3: - last = last[:-2] - elif padchars == 4: - last = last[:-3] - elif padchars == 6: - last = last[:-4] - else: - raise TypeError('Incorrect padding') - parts.append(last) - return EMPTYSTRING.join(parts) - - - -# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns -# lowercase. The RFC also recommends against accepting input case -# insensitively. -def b16encode(s): - """Encode a string using Base16. - - s is the string to encode. The encoded string is returned. - """ - return binascii.hexlify(s).upper() - - -def b16decode(s, casefold=False): - """Decode a Base16 encoded string. - - s is the string to decode. Optional casefold is a flag specifying whether - a lowercase alphabet is acceptable as input. For security purposes, the - default is False. - - The decoded string is returned. A TypeError is raised if s is - incorrectly padded or if there are non-alphabet characters present in the - string. - """ - if casefold: - s = s.upper() - if re.search('[^0-9A-F]', s): - raise TypeError('Non-base16 digit found') - return binascii.unhexlify(s) - - - -# Legacy interface. This code could be cleaned up since I don't believe -# binascii has any line length limitations. It just doesn't seem worth it -# though. - -MAXLINESIZE = 76 # Excluding the CRLF -MAXBINSIZE = (MAXLINESIZE//4)*3 - -def encode(input, output): - """Encode a file.""" - while True: - s = input.read(MAXBINSIZE) - if not s: - break - while len(s) < MAXBINSIZE: - ns = input.read(MAXBINSIZE-len(s)) - if not ns: - break - s += ns - line = binascii.b2a_base64(s) - output.write(line) - - -def decode(input, output): - """Decode a file.""" - while True: - line = input.readline() - if not line: - break - s = binascii.a2b_base64(line) - output.write(s) - - -def encodestring(s): - """Encode a string into multiple lines of base-64 data.""" - pieces = [] - for i in range(0, len(s), MAXBINSIZE): - chunk = s[i : i + MAXBINSIZE] - pieces.append(binascii.b2a_base64(chunk)) - return "".join(pieces) - - -def decodestring(s): - """Decode a string.""" - return binascii.a2b_base64(s) - - - -# Useable as a script... -def test(): - """Small test program""" - import sys, getopt - try: - opts, args = getopt.getopt(sys.argv[1:], 'deut') - except getopt.error, msg: - sys.stdout = sys.stderr - print msg - print """usage: %s [-d|-e|-u|-t] [file|-] - -d, -u: decode - -e: encode (default) - -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0] - sys.exit(2) - func = encode - for o, a in opts: - if o == '-e': func = encode - if o == '-d': func = decode - if o == '-u': func = decode - if o == '-t': test1(); return - if args and args[0] != '-': - with open(args[0], 'rb') as f: - func(f, sys.stdout) - else: - func(sys.stdin, sys.stdout) - - -def test1(): - s0 = "Aladdin:open sesame" - s1 = encodestring(s0) - s2 = decodestring(s1) - print s0, repr(s1), s2 - - -if __name__ == '__main__': - test() diff --git a/python/Lib/bdb.py b/python/Lib/bdb.py deleted file mode 100755 index 59440a99a0..0000000000 --- a/python/Lib/bdb.py +++ /dev/null @@ -1,645 +0,0 @@ -"""Debugger basics""" - -import fnmatch -import sys -import os -import types - -__all__ = ["BdbQuit","Bdb","Breakpoint"] - -class BdbQuit(Exception): - """Exception to give up completely""" - - -class Bdb: - - """Generic Python debugger base class. - - This class takes care of details of the trace facility; - a derived class should implement user interaction. - The standard debugger class (pdb.Pdb) is an example. - """ - - def __init__(self, skip=None): - self.skip = set(skip) if skip else None - self.breaks = {} - self.fncache = {} - self.frame_returning = None - - def canonic(self, filename): - if filename == "<" + filename[1:-1] + ">": - return filename - canonic = self.fncache.get(filename) - if not canonic: - canonic = os.path.abspath(filename) - canonic = os.path.normcase(canonic) - self.fncache[filename] = canonic - return canonic - - def reset(self): - import linecache - linecache.checkcache() - self.botframe = None - self._set_stopinfo(None, None) - - def trace_dispatch(self, frame, event, arg): - if self.quitting: - return # None - if event == 'line': - return self.dispatch_line(frame) - if event == 'call': - return self.dispatch_call(frame, arg) - if event == 'return': - return self.dispatch_return(frame, arg) - if event == 'exception': - return self.dispatch_exception(frame, arg) - if event == 'c_call': - return self.trace_dispatch - if event == 'c_exception': - return self.trace_dispatch - if event == 'c_return': - return self.trace_dispatch - print 'bdb.Bdb.dispatch: unknown debugging event:', repr(event) - return self.trace_dispatch - - def dispatch_line(self, frame): - if self.stop_here(frame) or self.break_here(frame): - self.user_line(frame) - if self.quitting: raise BdbQuit - return self.trace_dispatch - - def dispatch_call(self, frame, arg): - # XXX 'arg' is no longer used - if self.botframe is None: - # First call of dispatch since reset() - self.botframe = frame.f_back # (CT) Note that this may also be None! - return self.trace_dispatch - if not (self.stop_here(frame) or self.break_anywhere(frame)): - # No need to trace this function - return # None - self.user_call(frame, arg) - if self.quitting: raise BdbQuit - return self.trace_dispatch - - def dispatch_return(self, frame, arg): - if self.stop_here(frame) or frame == self.returnframe: - try: - self.frame_returning = frame - self.user_return(frame, arg) - finally: - self.frame_returning = None - if self.quitting: raise BdbQuit - return self.trace_dispatch - - def dispatch_exception(self, frame, arg): - if self.stop_here(frame): - self.user_exception(frame, arg) - if self.quitting: raise BdbQuit - return self.trace_dispatch - - # Normally derived classes don't override the following - # methods, but they may if they want to redefine the - # definition of stopping and breakpoints. - - def is_skipped_module(self, module_name): - for pattern in self.skip: - if fnmatch.fnmatch(module_name, pattern): - return True - return False - - def stop_here(self, frame): - # (CT) stopframe may now also be None, see dispatch_call. - # (CT) the former test for None is therefore removed from here. - if self.skip and \ - self.is_skipped_module(frame.f_globals.get('__name__')): - return False - if frame is self.stopframe: - if self.stoplineno == -1: - return False - return frame.f_lineno >= self.stoplineno - while frame is not None and frame is not self.stopframe: - if frame is self.botframe: - return True - frame = frame.f_back - return False - - def break_here(self, frame): - filename = self.canonic(frame.f_code.co_filename) - if not filename in self.breaks: - return False - lineno = frame.f_lineno - if not lineno in self.breaks[filename]: - # The line itself has no breakpoint, but maybe the line is the - # first line of a function with breakpoint set by function name. - lineno = frame.f_code.co_firstlineno - if not lineno in self.breaks[filename]: - return False - - # flag says ok to delete temp. bp - (bp, flag) = effective(filename, lineno, frame) - if bp: - self.currentbp = bp.number - if (flag and bp.temporary): - self.do_clear(str(bp.number)) - return True - else: - return False - - def do_clear(self, arg): - raise NotImplementedError, "subclass of bdb must implement do_clear()" - - def break_anywhere(self, frame): - return self.canonic(frame.f_code.co_filename) in self.breaks - - # Derived classes should override the user_* methods - # to gain control. - - def user_call(self, frame, argument_list): - """This method is called when there is the remote possibility - that we ever need to stop in this function.""" - pass - - def user_line(self, frame): - """This method is called when we stop or break at this line.""" - pass - - def user_return(self, frame, return_value): - """This method is called when a return trap is set here.""" - pass - - def user_exception(self, frame, exc_info): - exc_type, exc_value, exc_traceback = exc_info - """This method is called if an exception occurs, - but only if we are to stop at or just below this level.""" - pass - - def _set_stopinfo(self, stopframe, returnframe, stoplineno=0): - self.stopframe = stopframe - self.returnframe = returnframe - self.quitting = 0 - # stoplineno >= 0 means: stop at line >= the stoplineno - # stoplineno -1 means: don't stop at all - self.stoplineno = stoplineno - - # Derived classes and clients can call the following methods - # to affect the stepping state. - - def set_until(self, frame): #the name "until" is borrowed from gdb - """Stop when the line with the line no greater than the current one is - reached or when returning from current frame""" - self._set_stopinfo(frame, frame, frame.f_lineno+1) - - def set_step(self): - """Stop after one line of code.""" - # Issue #13183: pdb skips frames after hitting a breakpoint and running - # step commands. - # Restore the trace function in the caller (that may not have been set - # for performance reasons) when returning from the current frame. - if self.frame_returning: - caller_frame = self.frame_returning.f_back - if caller_frame and not caller_frame.f_trace: - caller_frame.f_trace = self.trace_dispatch - self._set_stopinfo(None, None) - - def set_next(self, frame): - """Stop on the next line in or below the given frame.""" - self._set_stopinfo(frame, None) - - def set_return(self, frame): - """Stop when returning from the given frame.""" - self._set_stopinfo(frame.f_back, frame) - - def set_trace(self, frame=None): - """Start debugging from `frame`. - - If frame is not specified, debugging starts from caller's frame. - """ - if frame is None: - frame = sys._getframe().f_back - self.reset() - while frame: - frame.f_trace = self.trace_dispatch - self.botframe = frame - frame = frame.f_back - self.set_step() - sys.settrace(self.trace_dispatch) - - def set_continue(self): - # Don't stop except at breakpoints or when finished - self._set_stopinfo(self.botframe, None, -1) - if not self.breaks: - # no breakpoints; run without debugger overhead - sys.settrace(None) - frame = sys._getframe().f_back - while frame and frame is not self.botframe: - del frame.f_trace - frame = frame.f_back - - def set_quit(self): - self.stopframe = self.botframe - self.returnframe = None - self.quitting = 1 - sys.settrace(None) - - # Derived classes and clients can call the following methods - # to manipulate breakpoints. These methods return an - # error message is something went wrong, None if all is well. - # Set_break prints out the breakpoint line and file:lineno. - # Call self.get_*break*() to see the breakpoints or better - # for bp in Breakpoint.bpbynumber: if bp: bp.bpprint(). - - def set_break(self, filename, lineno, temporary=0, cond = None, - funcname=None): - filename = self.canonic(filename) - import linecache # Import as late as possible - line = linecache.getline(filename, lineno) - if not line: - return 'Line %s:%d does not exist' % (filename, - lineno) - if not filename in self.breaks: - self.breaks[filename] = [] - list = self.breaks[filename] - if not lineno in list: - list.append(lineno) - bp = Breakpoint(filename, lineno, temporary, cond, funcname) - - def _prune_breaks(self, filename, lineno): - if (filename, lineno) not in Breakpoint.bplist: - self.breaks[filename].remove(lineno) - if not self.breaks[filename]: - del self.breaks[filename] - - def clear_break(self, filename, lineno): - filename = self.canonic(filename) - if not filename in self.breaks: - return 'There are no breakpoints in %s' % filename - if lineno not in self.breaks[filename]: - return 'There is no breakpoint at %s:%d' % (filename, - lineno) - # If there's only one bp in the list for that file,line - # pair, then remove the breaks entry - for bp in Breakpoint.bplist[filename, lineno][:]: - bp.deleteMe() - self._prune_breaks(filename, lineno) - - def clear_bpbynumber(self, arg): - try: - number = int(arg) - except: - return 'Non-numeric breakpoint number (%s)' % arg - try: - bp = Breakpoint.bpbynumber[number] - except IndexError: - return 'Breakpoint number (%d) out of range' % number - if not bp: - return 'Breakpoint (%d) already deleted' % number - bp.deleteMe() - self._prune_breaks(bp.file, bp.line) - - def clear_all_file_breaks(self, filename): - filename = self.canonic(filename) - if not filename in self.breaks: - return 'There are no breakpoints in %s' % filename - for line in self.breaks[filename]: - blist = Breakpoint.bplist[filename, line] - for bp in blist: - bp.deleteMe() - del self.breaks[filename] - - def clear_all_breaks(self): - if not self.breaks: - return 'There are no breakpoints' - for bp in Breakpoint.bpbynumber: - if bp: - bp.deleteMe() - self.breaks = {} - - def get_break(self, filename, lineno): - filename = self.canonic(filename) - return filename in self.breaks and \ - lineno in self.breaks[filename] - - def get_breaks(self, filename, lineno): - filename = self.canonic(filename) - return filename in self.breaks and \ - lineno in self.breaks[filename] and \ - Breakpoint.bplist[filename, lineno] or [] - - def get_file_breaks(self, filename): - filename = self.canonic(filename) - if filename in self.breaks: - return self.breaks[filename] - else: - return [] - - def get_all_breaks(self): - return self.breaks - - # Derived classes and clients can call the following method - # to get a data structure representing a stack trace. - - def get_stack(self, f, t): - stack = [] - if t and t.tb_frame is f: - t = t.tb_next - while f is not None: - stack.append((f, f.f_lineno)) - if f is self.botframe: - break - f = f.f_back - stack.reverse() - i = max(0, len(stack) - 1) - while t is not None: - stack.append((t.tb_frame, t.tb_lineno)) - t = t.tb_next - if f is None: - i = max(0, len(stack) - 1) - return stack, i - - # - - def format_stack_entry(self, frame_lineno, lprefix=': '): - import linecache, repr - frame, lineno = frame_lineno - filename = self.canonic(frame.f_code.co_filename) - s = '%s(%r)' % (filename, lineno) - if frame.f_code.co_name: - s = s + frame.f_code.co_name - else: - s = s + "" - if '__args__' in frame.f_locals: - args = frame.f_locals['__args__'] - else: - args = None - if args: - s = s + repr.repr(args) - else: - s = s + '()' - if '__return__' in frame.f_locals: - rv = frame.f_locals['__return__'] - s = s + '->' - s = s + repr.repr(rv) - line = linecache.getline(filename, lineno, frame.f_globals) - if line: s = s + lprefix + line.strip() - return s - - # The following two methods can be called by clients to use - # a debugger to debug a statement, given as a string. - - def run(self, cmd, globals=None, locals=None): - if globals is None: - import __main__ - globals = __main__.__dict__ - if locals is None: - locals = globals - self.reset() - sys.settrace(self.trace_dispatch) - if not isinstance(cmd, types.CodeType): - cmd = cmd+'\n' - try: - exec cmd in globals, locals - except BdbQuit: - pass - finally: - self.quitting = 1 - sys.settrace(None) - - def runeval(self, expr, globals=None, locals=None): - if globals is None: - import __main__ - globals = __main__.__dict__ - if locals is None: - locals = globals - self.reset() - sys.settrace(self.trace_dispatch) - if not isinstance(expr, types.CodeType): - expr = expr+'\n' - try: - return eval(expr, globals, locals) - except BdbQuit: - pass - finally: - self.quitting = 1 - sys.settrace(None) - - def runctx(self, cmd, globals, locals): - # B/W compatibility - self.run(cmd, globals, locals) - - # This method is more useful to debug a single function call. - - def runcall(self, func, *args, **kwds): - self.reset() - sys.settrace(self.trace_dispatch) - res = None - try: - res = func(*args, **kwds) - except BdbQuit: - pass - finally: - self.quitting = 1 - sys.settrace(None) - return res - - -def set_trace(): - Bdb().set_trace() - - -class Breakpoint: - - """Breakpoint class - - Implements temporary breakpoints, ignore counts, disabling and - (re)-enabling, and conditionals. - - Breakpoints are indexed by number through bpbynumber and by - the file,line tuple using bplist. The former points to a - single instance of class Breakpoint. The latter points to a - list of such instances since there may be more than one - breakpoint per line. - - """ - - # XXX Keeping state in the class is a mistake -- this means - # you cannot have more than one active Bdb instance. - - next = 1 # Next bp to be assigned - bplist = {} # indexed by (file, lineno) tuple - bpbynumber = [None] # Each entry is None or an instance of Bpt - # index 0 is unused, except for marking an - # effective break .... see effective() - - def __init__(self, file, line, temporary=0, cond=None, funcname=None): - self.funcname = funcname - # Needed if funcname is not None. - self.func_first_executable_line = None - self.file = file # This better be in canonical form! - self.line = line - self.temporary = temporary - self.cond = cond - self.enabled = 1 - self.ignore = 0 - self.hits = 0 - self.number = Breakpoint.next - Breakpoint.next = Breakpoint.next + 1 - # Build the two lists - self.bpbynumber.append(self) - if (file, line) in self.bplist: - self.bplist[file, line].append(self) - else: - self.bplist[file, line] = [self] - - - def deleteMe(self): - index = (self.file, self.line) - self.bpbynumber[self.number] = None # No longer in list - self.bplist[index].remove(self) - if not self.bplist[index]: - # No more bp for this f:l combo - del self.bplist[index] - - def enable(self): - self.enabled = 1 - - def disable(self): - self.enabled = 0 - - def bpprint(self, out=None): - if out is None: - out = sys.stdout - if self.temporary: - disp = 'del ' - else: - disp = 'keep ' - if self.enabled: - disp = disp + 'yes ' - else: - disp = disp + 'no ' - print >>out, '%-4dbreakpoint %s at %s:%d' % (self.number, disp, - self.file, self.line) - if self.cond: - print >>out, '\tstop only if %s' % (self.cond,) - if self.ignore: - print >>out, '\tignore next %d hits' % (self.ignore) - if (self.hits): - if (self.hits > 1): ss = 's' - else: ss = '' - print >>out, ('\tbreakpoint already hit %d time%s' % - (self.hits, ss)) - -# -----------end of Breakpoint class---------- - -def checkfuncname(b, frame): - """Check whether we should break here because of `b.funcname`.""" - if not b.funcname: - # Breakpoint was set via line number. - if b.line != frame.f_lineno: - # Breakpoint was set at a line with a def statement and the function - # defined is called: don't break. - return False - return True - - # Breakpoint set via function name. - - if frame.f_code.co_name != b.funcname: - # It's not a function call, but rather execution of def statement. - return False - - # We are in the right frame. - if not b.func_first_executable_line: - # The function is entered for the 1st time. - b.func_first_executable_line = frame.f_lineno - - if b.func_first_executable_line != frame.f_lineno: - # But we are not at the first line number: don't break. - return False - return True - -# Determines if there is an effective (active) breakpoint at this -# line of code. Returns breakpoint number or 0 if none -def effective(file, line, frame): - """Determine which breakpoint for this file:line is to be acted upon. - - Called only if we know there is a bpt at this - location. Returns breakpoint that was triggered and a flag - that indicates if it is ok to delete a temporary bp. - - """ - possibles = Breakpoint.bplist[file,line] - for i in range(0, len(possibles)): - b = possibles[i] - if b.enabled == 0: - continue - if not checkfuncname(b, frame): - continue - # Count every hit when bp is enabled - b.hits = b.hits + 1 - if not b.cond: - # If unconditional, and ignoring, - # go on to next, else break - if b.ignore > 0: - b.ignore = b.ignore -1 - continue - else: - # breakpoint and marker that's ok - # to delete if temporary - return (b,1) - else: - # Conditional bp. - # Ignore count applies only to those bpt hits where the - # condition evaluates to true. - try: - val = eval(b.cond, frame.f_globals, - frame.f_locals) - if val: - if b.ignore > 0: - b.ignore = b.ignore -1 - # continue - else: - return (b,1) - # else: - # continue - except: - # if eval fails, most conservative - # thing is to stop on breakpoint - # regardless of ignore count. - # Don't delete temporary, - # as another hint to user. - return (b,0) - return (None, None) - -# -------------------- testing -------------------- - -class Tdb(Bdb): - def user_call(self, frame, args): - name = frame.f_code.co_name - if not name: name = '???' - print '+++ call', name, args - def user_line(self, frame): - import linecache - name = frame.f_code.co_name - if not name: name = '???' - fn = self.canonic(frame.f_code.co_filename) - line = linecache.getline(fn, frame.f_lineno, frame.f_globals) - print '+++', fn, frame.f_lineno, name, ':', line.strip() - def user_return(self, frame, retval): - print '+++ return', retval - def user_exception(self, frame, exc_stuff): - print '+++ exception', exc_stuff - self.set_continue() - -def foo(n): - print 'foo(', n, ')' - x = bar(n*10) - print 'bar returned', x - -def bar(a): - print 'bar(', a, ')' - return a/2 - -def test(): - t = Tdb() - t.run('import bdb; bdb.foo(10)') - -# end diff --git a/python/Lib/binhex.py b/python/Lib/binhex.py deleted file mode 100755 index 14ec233752..0000000000 --- a/python/Lib/binhex.py +++ /dev/null @@ -1,518 +0,0 @@ -"""Macintosh binhex compression/decompression. - -easy interface: -binhex(inputfilename, outputfilename) -hexbin(inputfilename, outputfilename) -""" - -# -# Jack Jansen, CWI, August 1995. -# -# The module is supposed to be as compatible as possible. Especially the -# easy interface should work "as expected" on any platform. -# XXXX Note: currently, textfiles appear in mac-form on all platforms. -# We seem to lack a simple character-translate in python. -# (we should probably use ISO-Latin-1 on all but the mac platform). -# XXXX The simple routines are too simple: they expect to hold the complete -# files in-core. Should be fixed. -# XXXX It would be nice to handle AppleDouble format on unix -# (for servers serving macs). -# XXXX I don't understand what happens when you get 0x90 times the same byte on -# input. The resulting code (xx 90 90) would appear to be interpreted as an -# escaped *value* of 0x90. All coders I've seen appear to ignore this nicety... -# -import sys -import os -import struct -import binascii - -__all__ = ["binhex","hexbin","Error"] - -class Error(Exception): - pass - -# States (what have we written) -_DID_HEADER = 0 -_DID_DATA = 1 - -# Various constants -REASONABLY_LARGE=32768 # Minimal amount we pass the rle-coder -LINELEN=64 -RUNCHAR=chr(0x90) # run-length introducer - -# -# This code is no longer byte-order dependent - -# -# Workarounds for non-mac machines. -try: - from Carbon.File import FSSpec, FInfo - from MacOS import openrf - - def getfileinfo(name): - finfo = FSSpec(name).FSpGetFInfo() - dir, file = os.path.split(name) - # XXX Get resource/data sizes - fp = open(name, 'rb') - fp.seek(0, 2) - dlen = fp.tell() - fp = openrf(name, '*rb') - fp.seek(0, 2) - rlen = fp.tell() - return file, finfo, dlen, rlen - - def openrsrc(name, *mode): - if not mode: - mode = '*rb' - else: - mode = '*' + mode[0] - return openrf(name, mode) - -except ImportError: - # - # Glue code for non-macintosh usage - # - - class FInfo: - def __init__(self): - self.Type = '????' - self.Creator = '????' - self.Flags = 0 - - def getfileinfo(name): - finfo = FInfo() - # Quick check for textfile - fp = open(name) - data = open(name).read(256) - for c in data: - if not c.isspace() and (c<' ' or ord(c) > 0x7f): - break - else: - finfo.Type = 'TEXT' - fp.seek(0, 2) - dsize = fp.tell() - fp.close() - dir, file = os.path.split(name) - file = file.replace(':', '-', 1) - return file, finfo, dsize, 0 - - class openrsrc: - def __init__(self, *args): - pass - - def read(self, *args): - return '' - - def write(self, *args): - pass - - def close(self): - pass - -class _Hqxcoderengine: - """Write data to the coder in 3-byte chunks""" - - def __init__(self, ofp): - self.ofp = ofp - self.data = '' - self.hqxdata = '' - self.linelen = LINELEN-1 - - def write(self, data): - self.data = self.data + data - datalen = len(self.data) - todo = (datalen//3)*3 - data = self.data[:todo] - self.data = self.data[todo:] - if not data: - return - self.hqxdata = self.hqxdata + binascii.b2a_hqx(data) - self._flush(0) - - def _flush(self, force): - first = 0 - while first <= len(self.hqxdata)-self.linelen: - last = first + self.linelen - self.ofp.write(self.hqxdata[first:last]+'\n') - self.linelen = LINELEN - first = last - self.hqxdata = self.hqxdata[first:] - if force: - self.ofp.write(self.hqxdata + ':\n') - - def close(self): - if self.data: - self.hqxdata = \ - self.hqxdata + binascii.b2a_hqx(self.data) - self._flush(1) - self.ofp.close() - del self.ofp - -class _Rlecoderengine: - """Write data to the RLE-coder in suitably large chunks""" - - def __init__(self, ofp): - self.ofp = ofp - self.data = '' - - def write(self, data): - self.data = self.data + data - if len(self.data) < REASONABLY_LARGE: - return - rledata = binascii.rlecode_hqx(self.data) - self.ofp.write(rledata) - self.data = '' - - def close(self): - if self.data: - rledata = binascii.rlecode_hqx(self.data) - self.ofp.write(rledata) - self.ofp.close() - del self.ofp - -class BinHex: - def __init__(self, name_finfo_dlen_rlen, ofp): - name, finfo, dlen, rlen = name_finfo_dlen_rlen - if type(ofp) == type(''): - ofname = ofp - ofp = open(ofname, 'w') - ofp.write('(This file must be converted with BinHex 4.0)\n\n:') - hqxer = _Hqxcoderengine(ofp) - self.ofp = _Rlecoderengine(hqxer) - self.crc = 0 - if finfo is None: - finfo = FInfo() - self.dlen = dlen - self.rlen = rlen - self._writeinfo(name, finfo) - self.state = _DID_HEADER - - def _writeinfo(self, name, finfo): - nl = len(name) - if nl > 63: - raise Error, 'Filename too long' - d = chr(nl) + name + '\0' - d2 = finfo.Type + finfo.Creator - - # Force all structs to be packed with big-endian - d3 = struct.pack('>h', finfo.Flags) - d4 = struct.pack('>ii', self.dlen, self.rlen) - info = d + d2 + d3 + d4 - self._write(info) - self._writecrc() - - def _write(self, data): - self.crc = binascii.crc_hqx(data, self.crc) - self.ofp.write(data) - - def _writecrc(self): - # XXXX Should this be here?? - # self.crc = binascii.crc_hqx('\0\0', self.crc) - if self.crc < 0: - fmt = '>h' - else: - fmt = '>H' - self.ofp.write(struct.pack(fmt, self.crc)) - self.crc = 0 - - def write(self, data): - if self.state != _DID_HEADER: - raise Error, 'Writing data at the wrong time' - self.dlen = self.dlen - len(data) - self._write(data) - - def close_data(self): - if self.dlen != 0: - raise Error, 'Incorrect data size, diff=%r' % (self.rlen,) - self._writecrc() - self.state = _DID_DATA - - def write_rsrc(self, data): - if self.state < _DID_DATA: - self.close_data() - if self.state != _DID_DATA: - raise Error, 'Writing resource data at the wrong time' - self.rlen = self.rlen - len(data) - self._write(data) - - def close(self): - if self.state is None: - return - try: - if self.state < _DID_DATA: - self.close_data() - if self.state != _DID_DATA: - raise Error, 'Close at the wrong time' - if self.rlen != 0: - raise Error, \ - "Incorrect resource-datasize, diff=%r" % (self.rlen,) - self._writecrc() - finally: - self.state = None - ofp = self.ofp - del self.ofp - ofp.close() - -def binhex(inp, out): - """(infilename, outfilename) - Create binhex-encoded copy of a file""" - finfo = getfileinfo(inp) - ofp = BinHex(finfo, out) - - ifp = open(inp, 'rb') - # XXXX Do textfile translation on non-mac systems - while 1: - d = ifp.read(128000) - if not d: break - ofp.write(d) - ofp.close_data() - ifp.close() - - ifp = openrsrc(inp, 'rb') - while 1: - d = ifp.read(128000) - if not d: break - ofp.write_rsrc(d) - ofp.close() - ifp.close() - -class _Hqxdecoderengine: - """Read data via the decoder in 4-byte chunks""" - - def __init__(self, ifp): - self.ifp = ifp - self.eof = 0 - - def read(self, totalwtd): - """Read at least wtd bytes (or until EOF)""" - decdata = '' - wtd = totalwtd - # - # The loop here is convoluted, since we don't really now how - # much to decode: there may be newlines in the incoming data. - while wtd > 0: - if self.eof: return decdata - wtd = ((wtd+2)//3)*4 - data = self.ifp.read(wtd) - # - # Next problem: there may not be a complete number of - # bytes in what we pass to a2b. Solve by yet another - # loop. - # - while 1: - try: - decdatacur, self.eof = \ - binascii.a2b_hqx(data) - break - except binascii.Incomplete: - pass - newdata = self.ifp.read(1) - if not newdata: - raise Error, \ - 'Premature EOF on binhex file' - data = data + newdata - decdata = decdata + decdatacur - wtd = totalwtd - len(decdata) - if not decdata and not self.eof: - raise Error, 'Premature EOF on binhex file' - return decdata - - def close(self): - self.ifp.close() - -class _Rledecoderengine: - """Read data via the RLE-coder""" - - def __init__(self, ifp): - self.ifp = ifp - self.pre_buffer = '' - self.post_buffer = '' - self.eof = 0 - - def read(self, wtd): - if wtd > len(self.post_buffer): - self._fill(wtd-len(self.post_buffer)) - rv = self.post_buffer[:wtd] - self.post_buffer = self.post_buffer[wtd:] - return rv - - def _fill(self, wtd): - self.pre_buffer = self.pre_buffer + self.ifp.read(wtd+4) - if self.ifp.eof: - self.post_buffer = self.post_buffer + \ - binascii.rledecode_hqx(self.pre_buffer) - self.pre_buffer = '' - return - - # - # Obfuscated code ahead. We have to take care that we don't - # end up with an orphaned RUNCHAR later on. So, we keep a couple - # of bytes in the buffer, depending on what the end of - # the buffer looks like: - # '\220\0\220' - Keep 3 bytes: repeated \220 (escaped as \220\0) - # '?\220' - Keep 2 bytes: repeated something-else - # '\220\0' - Escaped \220: Keep 2 bytes. - # '?\220?' - Complete repeat sequence: decode all - # otherwise: keep 1 byte. - # - mark = len(self.pre_buffer) - if self.pre_buffer[-3:] == RUNCHAR + '\0' + RUNCHAR: - mark = mark - 3 - elif self.pre_buffer[-1] == RUNCHAR: - mark = mark - 2 - elif self.pre_buffer[-2:] == RUNCHAR + '\0': - mark = mark - 2 - elif self.pre_buffer[-2] == RUNCHAR: - pass # Decode all - else: - mark = mark - 1 - - self.post_buffer = self.post_buffer + \ - binascii.rledecode_hqx(self.pre_buffer[:mark]) - self.pre_buffer = self.pre_buffer[mark:] - - def close(self): - self.ifp.close() - -class HexBin: - def __init__(self, ifp): - if type(ifp) == type(''): - ifp = open(ifp) - # - # Find initial colon. - # - while 1: - ch = ifp.read(1) - if not ch: - raise Error, "No binhex data found" - # Cater for \r\n terminated lines (which show up as \n\r, hence - # all lines start with \r) - if ch == '\r': - continue - if ch == ':': - break - if ch != '\n': - dummy = ifp.readline() - - hqxifp = _Hqxdecoderengine(ifp) - self.ifp = _Rledecoderengine(hqxifp) - self.crc = 0 - self._readheader() - - def _read(self, len): - data = self.ifp.read(len) - self.crc = binascii.crc_hqx(data, self.crc) - return data - - def _checkcrc(self): - filecrc = struct.unpack('>h', self.ifp.read(2))[0] & 0xffff - #self.crc = binascii.crc_hqx('\0\0', self.crc) - # XXXX Is this needed?? - self.crc = self.crc & 0xffff - if filecrc != self.crc: - raise Error, 'CRC error, computed %x, read %x' \ - %(self.crc, filecrc) - self.crc = 0 - - def _readheader(self): - len = self._read(1) - fname = self._read(ord(len)) - rest = self._read(1+4+4+2+4+4) - self._checkcrc() - - type = rest[1:5] - creator = rest[5:9] - flags = struct.unpack('>h', rest[9:11])[0] - self.dlen = struct.unpack('>l', rest[11:15])[0] - self.rlen = struct.unpack('>l', rest[15:19])[0] - - self.FName = fname - self.FInfo = FInfo() - self.FInfo.Creator = creator - self.FInfo.Type = type - self.FInfo.Flags = flags - - self.state = _DID_HEADER - - def read(self, *n): - if self.state != _DID_HEADER: - raise Error, 'Read data at wrong time' - if n: - n = n[0] - n = min(n, self.dlen) - else: - n = self.dlen - rv = '' - while len(rv) < n: - rv = rv + self._read(n-len(rv)) - self.dlen = self.dlen - n - return rv - - def close_data(self): - if self.state != _DID_HEADER: - raise Error, 'close_data at wrong time' - if self.dlen: - dummy = self._read(self.dlen) - self._checkcrc() - self.state = _DID_DATA - - def read_rsrc(self, *n): - if self.state == _DID_HEADER: - self.close_data() - if self.state != _DID_DATA: - raise Error, 'Read resource data at wrong time' - if n: - n = n[0] - n = min(n, self.rlen) - else: - n = self.rlen - self.rlen = self.rlen - n - return self._read(n) - - def close(self): - if self.state is None: - return - try: - if self.rlen: - dummy = self.read_rsrc(self.rlen) - self._checkcrc() - finally: - self.state = None - self.ifp.close() - -def hexbin(inp, out): - """(infilename, outfilename) - Decode binhexed file""" - ifp = HexBin(inp) - finfo = ifp.FInfo - if not out: - out = ifp.FName - - ofp = open(out, 'wb') - # XXXX Do translation on non-mac systems - while 1: - d = ifp.read(128000) - if not d: break - ofp.write(d) - ofp.close() - ifp.close_data() - - d = ifp.read_rsrc(128000) - if d: - ofp = openrsrc(out, 'wb') - ofp.write(d) - while 1: - d = ifp.read_rsrc(128000) - if not d: break - ofp.write(d) - ofp.close() - - ifp.close() - -def _test(): - fname = sys.argv[1] - binhex(fname, fname+'.hqx') - hexbin(fname+'.hqx', fname+'.viahqx') - #hexbin(fname, fname+'.unpacked') - sys.exit(1) - -if __name__ == '__main__': - _test() diff --git a/python/Lib/bisect.py b/python/Lib/bisect.py deleted file mode 100755 index 4a4d05255e..0000000000 --- a/python/Lib/bisect.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Bisection algorithms.""" - -def insort_right(a, x, lo=0, hi=None): - """Insert item x in list a, and keep it sorted assuming a is sorted. - - If x is already in a, insert it to the right of the rightmost x. - - Optional args lo (default 0) and hi (default len(a)) bound the - slice of a to be searched. - """ - - if lo < 0: - raise ValueError('lo must be non-negative') - if hi is None: - hi = len(a) - while lo < hi: - mid = (lo+hi)//2 - if x < a[mid]: hi = mid - else: lo = mid+1 - a.insert(lo, x) - -insort = insort_right # backward compatibility - -def bisect_right(a, x, lo=0, hi=None): - """Return the index where to insert item x in list a, assuming a is sorted. - - The return value i is such that all e in a[:i] have e <= x, and all e in - a[i:] have e > x. So if x already appears in the list, a.insert(x) will - insert just after the rightmost x already there. - - Optional args lo (default 0) and hi (default len(a)) bound the - slice of a to be searched. - """ - - if lo < 0: - raise ValueError('lo must be non-negative') - if hi is None: - hi = len(a) - while lo < hi: - mid = (lo+hi)//2 - if x < a[mid]: hi = mid - else: lo = mid+1 - return lo - -bisect = bisect_right # backward compatibility - -def insort_left(a, x, lo=0, hi=None): - """Insert item x in list a, and keep it sorted assuming a is sorted. - - If x is already in a, insert it to the left of the leftmost x. - - Optional args lo (default 0) and hi (default len(a)) bound the - slice of a to be searched. - """ - - if lo < 0: - raise ValueError('lo must be non-negative') - if hi is None: - hi = len(a) - while lo < hi: - mid = (lo+hi)//2 - if a[mid] < x: lo = mid+1 - else: hi = mid - a.insert(lo, x) - - -def bisect_left(a, x, lo=0, hi=None): - """Return the index where to insert item x in list a, assuming a is sorted. - - The return value i is such that all e in a[:i] have e < x, and all e in - a[i:] have e >= x. So if x already appears in the list, a.insert(x) will - insert just before the leftmost x already there. - - Optional args lo (default 0) and hi (default len(a)) bound the - slice of a to be searched. - """ - - if lo < 0: - raise ValueError('lo must be non-negative') - if hi is None: - hi = len(a) - while lo < hi: - mid = (lo+hi)//2 - if a[mid] < x: lo = mid+1 - else: hi = mid - return lo - -# Overwrite above definitions with a fast C implementation -try: - from _bisect import * -except ImportError: - pass diff --git a/python/Lib/bsddb/__init__.py b/python/Lib/bsddb/__init__.py deleted file mode 100755 index 13c9c27ae8..0000000000 --- a/python/Lib/bsddb/__init__.py +++ /dev/null @@ -1,455 +0,0 @@ -#---------------------------------------------------------------------- -# Copyright (c) 1999-2001, Digital Creations, Fredericksburg, VA, USA -# and Andrew Kuchling. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# o Redistributions of source code must retain the above copyright -# notice, this list of conditions, and the disclaimer that follows. -# -# o Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions, and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# o Neither the name of Digital Creations nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS AND CONTRIBUTORS *AS -# IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL -# CREATIONS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH -# DAMAGE. -#---------------------------------------------------------------------- - - -"""Support for Berkeley DB 4.3 through 5.3 with a simple interface. - -For the full featured object oriented interface use the bsddb.db module -instead. It mirrors the Oracle Berkeley DB C API. -""" - -import sys -absolute_import = (sys.version_info[0] >= 3) - -if (sys.version_info >= (2, 6)) and (sys.version_info < (3, 0)) : - import warnings - if sys.py3kwarning and (__name__ != 'bsddb3') : - warnings.warnpy3k("in 3.x, the bsddb module has been removed; " - "please use the pybsddb project instead", - DeprecationWarning, 2) - warnings.filterwarnings("ignore", ".*CObject.*", DeprecationWarning, - "bsddb.__init__") - -try: - if __name__ == 'bsddb3': - # import _pybsddb binary as it should be the more recent version from - # a standalone pybsddb addon package than the version included with - # python as bsddb._bsddb. - if absolute_import : - # Because this syntaxis is not valid before Python 2.5 - exec("from . import _pybsddb") - else : - import _pybsddb - _bsddb = _pybsddb - from bsddb3.dbutils import DeadlockWrap as _DeadlockWrap - else: - import _bsddb - from bsddb.dbutils import DeadlockWrap as _DeadlockWrap -except ImportError: - # Remove ourselves from sys.modules - import sys - del sys.modules[__name__] - raise - -# bsddb3 calls it db, but provide _db for backwards compatibility -db = _db = _bsddb -__version__ = db.__version__ - -error = db.DBError # So bsddb.error will mean something... - -#---------------------------------------------------------------------- - -import sys, os - -from weakref import ref - -if sys.version_info < (2, 6) : - import UserDict - MutableMapping = UserDict.DictMixin -else : - import collections - MutableMapping = collections.MutableMapping - -class _iter_mixin(MutableMapping): - def _make_iter_cursor(self): - cur = _DeadlockWrap(self.db.cursor) - key = id(cur) - self._cursor_refs[key] = ref(cur, self._gen_cref_cleaner(key)) - return cur - - def _gen_cref_cleaner(self, key): - # use generate the function for the weakref callback here - # to ensure that we do not hold a strict reference to cur - # in the callback. - return lambda ref: self._cursor_refs.pop(key, None) - - def __iter__(self): - self._kill_iteration = False - self._in_iter += 1 - try: - try: - cur = self._make_iter_cursor() - - # FIXME-20031102-greg: race condition. cursor could - # be closed by another thread before this call. - - # since we're only returning keys, we call the cursor - # methods with flags=0, dlen=0, dofs=0 - key = _DeadlockWrap(cur.first, 0,0,0)[0] - yield key - - next = getattr(cur, "next") - while 1: - try: - key = _DeadlockWrap(next, 0,0,0)[0] - yield key - except _bsddb.DBCursorClosedError: - if self._kill_iteration: - raise RuntimeError('Database changed size ' - 'during iteration.') - cur = self._make_iter_cursor() - # FIXME-20031101-greg: race condition. cursor could - # be closed by another thread before this call. - _DeadlockWrap(cur.set, key,0,0,0) - next = getattr(cur, "next") - except _bsddb.DBNotFoundError: - pass - except _bsddb.DBCursorClosedError: - # the database was modified during iteration. abort. - pass -# When Python 2.4 not supported in bsddb3, we can change this to "finally" - except : - self._in_iter -= 1 - raise - - self._in_iter -= 1 - - def iteritems(self): - if not self.db: - return - self._kill_iteration = False - self._in_iter += 1 - try: - try: - cur = self._make_iter_cursor() - - # FIXME-20031102-greg: race condition. cursor could - # be closed by another thread before this call. - - kv = _DeadlockWrap(cur.first) - key = kv[0] - yield kv - - next = getattr(cur, "next") - while 1: - try: - kv = _DeadlockWrap(next) - key = kv[0] - yield kv - except _bsddb.DBCursorClosedError: - if self._kill_iteration: - raise RuntimeError('Database changed size ' - 'during iteration.') - cur = self._make_iter_cursor() - # FIXME-20031101-greg: race condition. cursor could - # be closed by another thread before this call. - _DeadlockWrap(cur.set, key,0,0,0) - next = getattr(cur, "next") - except _bsddb.DBNotFoundError: - pass - except _bsddb.DBCursorClosedError: - # the database was modified during iteration. abort. - pass -# When Python 2.4 not supported in bsddb3, we can change this to "finally" - except : - self._in_iter -= 1 - raise - - self._in_iter -= 1 - - -class _DBWithCursor(_iter_mixin): - """ - A simple wrapper around DB that makes it look like the bsddbobject in - the old module. It uses a cursor as needed to provide DB traversal. - """ - def __init__(self, db): - self.db = db - self.db.set_get_returns_none(0) - - # FIXME-20031101-greg: I believe there is still the potential - # for deadlocks in a multithreaded environment if someone - # attempts to use the any of the cursor interfaces in one - # thread while doing a put or delete in another thread. The - # reason is that _checkCursor and _closeCursors are not atomic - # operations. Doing our own locking around self.dbc, - # self.saved_dbc_key and self._cursor_refs could prevent this. - # TODO: A test case demonstrating the problem needs to be written. - - # self.dbc is a DBCursor object used to implement the - # first/next/previous/last/set_location methods. - self.dbc = None - self.saved_dbc_key = None - - # a collection of all DBCursor objects currently allocated - # by the _iter_mixin interface. - self._cursor_refs = {} - self._in_iter = 0 - self._kill_iteration = False - - def __del__(self): - self.close() - - def _checkCursor(self): - if self.dbc is None: - self.dbc = _DeadlockWrap(self.db.cursor) - if self.saved_dbc_key is not None: - _DeadlockWrap(self.dbc.set, self.saved_dbc_key) - self.saved_dbc_key = None - - # This method is needed for all non-cursor DB calls to avoid - # Berkeley DB deadlocks (due to being opened with DB_INIT_LOCK - # and DB_THREAD to be thread safe) when intermixing database - # operations that use the cursor internally with those that don't. - def _closeCursors(self, save=1): - if self.dbc: - c = self.dbc - self.dbc = None - if save: - try: - self.saved_dbc_key = _DeadlockWrap(c.current, 0,0,0)[0] - except db.DBError: - pass - _DeadlockWrap(c.close) - del c - for cref in self._cursor_refs.values(): - c = cref() - if c is not None: - _DeadlockWrap(c.close) - - def _checkOpen(self): - if self.db is None: - raise error, "BSDDB object has already been closed" - - def isOpen(self): - return self.db is not None - - def __len__(self): - self._checkOpen() - return _DeadlockWrap(lambda: len(self.db)) # len(self.db) - - if sys.version_info >= (2, 6) : - def __repr__(self) : - if self.isOpen() : - return repr(dict(_DeadlockWrap(self.db.items))) - return repr(dict()) - - def __getitem__(self, key): - self._checkOpen() - return _DeadlockWrap(lambda: self.db[key]) # self.db[key] - - def __setitem__(self, key, value): - self._checkOpen() - self._closeCursors() - if self._in_iter and key not in self: - self._kill_iteration = True - def wrapF(): - self.db[key] = value - _DeadlockWrap(wrapF) # self.db[key] = value - - def __delitem__(self, key): - self._checkOpen() - self._closeCursors() - if self._in_iter and key in self: - self._kill_iteration = True - def wrapF(): - del self.db[key] - _DeadlockWrap(wrapF) # del self.db[key] - - def close(self): - self._closeCursors(save=0) - if self.dbc is not None: - _DeadlockWrap(self.dbc.close) - v = 0 - if self.db is not None: - v = _DeadlockWrap(self.db.close) - self.dbc = None - self.db = None - return v - - def keys(self): - self._checkOpen() - return _DeadlockWrap(self.db.keys) - - def has_key(self, key): - self._checkOpen() - return _DeadlockWrap(self.db.has_key, key) - - def set_location(self, key): - self._checkOpen() - self._checkCursor() - return _DeadlockWrap(self.dbc.set_range, key) - - def next(self): # Renamed by "2to3" - self._checkOpen() - self._checkCursor() - rv = _DeadlockWrap(getattr(self.dbc, "next")) - return rv - - if sys.version_info[0] >= 3 : # For "2to3" conversion - next = __next__ - - def previous(self): - self._checkOpen() - self._checkCursor() - rv = _DeadlockWrap(self.dbc.prev) - return rv - - def first(self): - self._checkOpen() - # fix 1725856: don't needlessly try to restore our cursor position - self.saved_dbc_key = None - self._checkCursor() - rv = _DeadlockWrap(self.dbc.first) - return rv - - def last(self): - self._checkOpen() - # fix 1725856: don't needlessly try to restore our cursor position - self.saved_dbc_key = None - self._checkCursor() - rv = _DeadlockWrap(self.dbc.last) - return rv - - def sync(self): - self._checkOpen() - return _DeadlockWrap(self.db.sync) - - -#---------------------------------------------------------------------- -# Compatibility object factory functions - -def hashopen(file, flag='c', mode=0666, pgsize=None, ffactor=None, nelem=None, - cachesize=None, lorder=None, hflags=0): - - flags = _checkflag(flag, file) - e = _openDBEnv(cachesize) - d = db.DB(e) - d.set_flags(hflags) - if pgsize is not None: d.set_pagesize(pgsize) - if lorder is not None: d.set_lorder(lorder) - if ffactor is not None: d.set_h_ffactor(ffactor) - if nelem is not None: d.set_h_nelem(nelem) - d.open(file, db.DB_HASH, flags, mode) - return _DBWithCursor(d) - -#---------------------------------------------------------------------- - -def btopen(file, flag='c', mode=0666, - btflags=0, cachesize=None, maxkeypage=None, minkeypage=None, - pgsize=None, lorder=None): - - flags = _checkflag(flag, file) - e = _openDBEnv(cachesize) - d = db.DB(e) - if pgsize is not None: d.set_pagesize(pgsize) - if lorder is not None: d.set_lorder(lorder) - d.set_flags(btflags) - if minkeypage is not None: d.set_bt_minkey(minkeypage) - if maxkeypage is not None: d.set_bt_maxkey(maxkeypage) - d.open(file, db.DB_BTREE, flags, mode) - return _DBWithCursor(d) - -#---------------------------------------------------------------------- - - -def rnopen(file, flag='c', mode=0666, - rnflags=0, cachesize=None, pgsize=None, lorder=None, - rlen=None, delim=None, source=None, pad=None): - - flags = _checkflag(flag, file) - e = _openDBEnv(cachesize) - d = db.DB(e) - if pgsize is not None: d.set_pagesize(pgsize) - if lorder is not None: d.set_lorder(lorder) - d.set_flags(rnflags) - if delim is not None: d.set_re_delim(delim) - if rlen is not None: d.set_re_len(rlen) - if source is not None: d.set_re_source(source) - if pad is not None: d.set_re_pad(pad) - d.open(file, db.DB_RECNO, flags, mode) - return _DBWithCursor(d) - -#---------------------------------------------------------------------- - -def _openDBEnv(cachesize): - e = db.DBEnv() - if cachesize is not None: - if cachesize >= 20480: - e.set_cachesize(0, cachesize) - else: - raise error, "cachesize must be >= 20480" - e.set_lk_detect(db.DB_LOCK_DEFAULT) - e.open('.', db.DB_PRIVATE | db.DB_CREATE | db.DB_THREAD | db.DB_INIT_LOCK | db.DB_INIT_MPOOL) - return e - -def _checkflag(flag, file): - if flag == 'r': - flags = db.DB_RDONLY - elif flag == 'rw': - flags = 0 - elif flag == 'w': - flags = db.DB_CREATE - elif flag == 'c': - flags = db.DB_CREATE - elif flag == 'n': - flags = db.DB_CREATE - #flags = db.DB_CREATE | db.DB_TRUNCATE - # we used db.DB_TRUNCATE flag for this before but Berkeley DB - # 4.2.52 changed to disallowed truncate with txn environments. - if file is not None and os.path.isfile(file): - os.unlink(file) - else: - raise error, "flags should be one of 'r', 'w', 'c' or 'n'" - return flags | db.DB_THREAD - -#---------------------------------------------------------------------- - - -# This is a silly little hack that allows apps to continue to use the -# DB_THREAD flag even on systems without threads without freaking out -# Berkeley DB. -# -# This assumes that if Python was built with thread support then -# Berkeley DB was too. - -try: - # 2to3 automatically changes "import thread" to "import _thread" - import thread as T - del T - -except ImportError: - db.DB_THREAD = 0 - -#---------------------------------------------------------------------- diff --git a/python/Lib/bsddb/db.py b/python/Lib/bsddb/db.py deleted file mode 100755 index c3aee307ef..0000000000 --- a/python/Lib/bsddb/db.py +++ /dev/null @@ -1,60 +0,0 @@ -#---------------------------------------------------------------------- -# Copyright (c) 1999-2001, Digital Creations, Fredericksburg, VA, USA -# and Andrew Kuchling. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# o Redistributions of source code must retain the above copyright -# notice, this list of conditions, and the disclaimer that follows. -# -# o Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions, and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# o Neither the name of Digital Creations nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS AND CONTRIBUTORS *AS -# IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL -# CREATIONS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH -# DAMAGE. -#---------------------------------------------------------------------- - - -# This module is just a placeholder for possible future expansion, in -# case we ever want to augment the stuff in _db in any way. For now -# it just simply imports everything from _db. - -import sys -absolute_import = (sys.version_info[0] >= 3) - -if not absolute_import : - if __name__.startswith('bsddb3.') : - # import _pybsddb binary as it should be the more recent version from - # a standalone pybsddb addon package than the version included with - # python as bsddb._bsddb. - from _pybsddb import * - from _pybsddb import __version__ - else: - from _bsddb import * - from _bsddb import __version__ -else : - # Because this syntaxis is not valid before Python 2.5 - if __name__.startswith('bsddb3.') : - exec("from ._pybsddb import *") - exec("from ._pybsddb import __version__") - else : - exec("from ._bsddb import *") - exec("from ._bsddb import __version__") diff --git a/python/Lib/bsddb/dbobj.py b/python/Lib/bsddb/dbobj.py deleted file mode 100755 index 1400fe15d2..0000000000 --- a/python/Lib/bsddb/dbobj.py +++ /dev/null @@ -1,266 +0,0 @@ -#------------------------------------------------------------------------- -# This file contains real Python object wrappers for DB and DBEnv -# C "objects" that can be usefully subclassed. The previous SWIG -# based interface allowed this thanks to SWIG's shadow classes. -# -- Gregory P. Smith -#------------------------------------------------------------------------- -# -# (C) Copyright 2001 Autonomous Zone Industries -# -# License: This is free software. You may use this software for any -# purpose including modification/redistribution, so long as -# this header remains intact and that you do not claim any -# rights of ownership or authorship of this software. This -# software has been tested, but no warranty is expressed or -# implied. -# - -# -# TODO it would be *really nice* to have an automatic shadow class populator -# so that new methods don't need to be added here manually after being -# added to _bsddb.c. -# - -import sys -absolute_import = (sys.version_info[0] >= 3) -if absolute_import : - # Because this syntaxis is not valid before Python 2.5 - exec("from . import db") -else : - import db - -if sys.version_info < (2, 6) : - from UserDict import DictMixin as MutableMapping -else : - import collections - MutableMapping = collections.MutableMapping - -class DBEnv: - def __init__(self, *args, **kwargs): - self._cobj = db.DBEnv(*args, **kwargs) - - def close(self, *args, **kwargs): - return self._cobj.close(*args, **kwargs) - def open(self, *args, **kwargs): - return self._cobj.open(*args, **kwargs) - def remove(self, *args, **kwargs): - return self._cobj.remove(*args, **kwargs) - def set_shm_key(self, *args, **kwargs): - return self._cobj.set_shm_key(*args, **kwargs) - def set_cachesize(self, *args, **kwargs): - return self._cobj.set_cachesize(*args, **kwargs) - def set_data_dir(self, *args, **kwargs): - return self._cobj.set_data_dir(*args, **kwargs) - def set_flags(self, *args, **kwargs): - return self._cobj.set_flags(*args, **kwargs) - def set_lg_bsize(self, *args, **kwargs): - return self._cobj.set_lg_bsize(*args, **kwargs) - def set_lg_dir(self, *args, **kwargs): - return self._cobj.set_lg_dir(*args, **kwargs) - def set_lg_max(self, *args, **kwargs): - return self._cobj.set_lg_max(*args, **kwargs) - def set_lk_detect(self, *args, **kwargs): - return self._cobj.set_lk_detect(*args, **kwargs) - if db.version() < (4,5): - def set_lk_max(self, *args, **kwargs): - return self._cobj.set_lk_max(*args, **kwargs) - def set_lk_max_locks(self, *args, **kwargs): - return self._cobj.set_lk_max_locks(*args, **kwargs) - def set_lk_max_lockers(self, *args, **kwargs): - return self._cobj.set_lk_max_lockers(*args, **kwargs) - def set_lk_max_objects(self, *args, **kwargs): - return self._cobj.set_lk_max_objects(*args, **kwargs) - def set_mp_mmapsize(self, *args, **kwargs): - return self._cobj.set_mp_mmapsize(*args, **kwargs) - def set_timeout(self, *args, **kwargs): - return self._cobj.set_timeout(*args, **kwargs) - def set_tmp_dir(self, *args, **kwargs): - return self._cobj.set_tmp_dir(*args, **kwargs) - def txn_begin(self, *args, **kwargs): - return self._cobj.txn_begin(*args, **kwargs) - def txn_checkpoint(self, *args, **kwargs): - return self._cobj.txn_checkpoint(*args, **kwargs) - def txn_stat(self, *args, **kwargs): - return self._cobj.txn_stat(*args, **kwargs) - def set_tx_max(self, *args, **kwargs): - return self._cobj.set_tx_max(*args, **kwargs) - def set_tx_timestamp(self, *args, **kwargs): - return self._cobj.set_tx_timestamp(*args, **kwargs) - def lock_detect(self, *args, **kwargs): - return self._cobj.lock_detect(*args, **kwargs) - def lock_get(self, *args, **kwargs): - return self._cobj.lock_get(*args, **kwargs) - def lock_id(self, *args, **kwargs): - return self._cobj.lock_id(*args, **kwargs) - def lock_put(self, *args, **kwargs): - return self._cobj.lock_put(*args, **kwargs) - def lock_stat(self, *args, **kwargs): - return self._cobj.lock_stat(*args, **kwargs) - def log_archive(self, *args, **kwargs): - return self._cobj.log_archive(*args, **kwargs) - - def set_get_returns_none(self, *args, **kwargs): - return self._cobj.set_get_returns_none(*args, **kwargs) - - def log_stat(self, *args, **kwargs): - return self._cobj.log_stat(*args, **kwargs) - - def dbremove(self, *args, **kwargs): - return self._cobj.dbremove(*args, **kwargs) - def dbrename(self, *args, **kwargs): - return self._cobj.dbrename(*args, **kwargs) - def set_encrypt(self, *args, **kwargs): - return self._cobj.set_encrypt(*args, **kwargs) - - if db.version() >= (4,4): - def fileid_reset(self, *args, **kwargs): - return self._cobj.fileid_reset(*args, **kwargs) - - def lsn_reset(self, *args, **kwargs): - return self._cobj.lsn_reset(*args, **kwargs) - - -class DB(MutableMapping): - def __init__(self, dbenv, *args, **kwargs): - # give it the proper DBEnv C object that its expecting - self._cobj = db.DB(*((dbenv._cobj,) + args), **kwargs) - - # TODO are there other dict methods that need to be overridden? - def __len__(self): - return len(self._cobj) - def __getitem__(self, arg): - return self._cobj[arg] - def __setitem__(self, key, value): - self._cobj[key] = value - def __delitem__(self, arg): - del self._cobj[arg] - - if sys.version_info >= (2, 6) : - def __iter__(self) : - return self._cobj.__iter__() - - def append(self, *args, **kwargs): - return self._cobj.append(*args, **kwargs) - def associate(self, *args, **kwargs): - return self._cobj.associate(*args, **kwargs) - def close(self, *args, **kwargs): - return self._cobj.close(*args, **kwargs) - def consume(self, *args, **kwargs): - return self._cobj.consume(*args, **kwargs) - def consume_wait(self, *args, **kwargs): - return self._cobj.consume_wait(*args, **kwargs) - def cursor(self, *args, **kwargs): - return self._cobj.cursor(*args, **kwargs) - def delete(self, *args, **kwargs): - return self._cobj.delete(*args, **kwargs) - def fd(self, *args, **kwargs): - return self._cobj.fd(*args, **kwargs) - def get(self, *args, **kwargs): - return self._cobj.get(*args, **kwargs) - def pget(self, *args, **kwargs): - return self._cobj.pget(*args, **kwargs) - def get_both(self, *args, **kwargs): - return self._cobj.get_both(*args, **kwargs) - def get_byteswapped(self, *args, **kwargs): - return self._cobj.get_byteswapped(*args, **kwargs) - def get_size(self, *args, **kwargs): - return self._cobj.get_size(*args, **kwargs) - def get_type(self, *args, **kwargs): - return self._cobj.get_type(*args, **kwargs) - def join(self, *args, **kwargs): - return self._cobj.join(*args, **kwargs) - def key_range(self, *args, **kwargs): - return self._cobj.key_range(*args, **kwargs) - def has_key(self, *args, **kwargs): - return self._cobj.has_key(*args, **kwargs) - def items(self, *args, **kwargs): - return self._cobj.items(*args, **kwargs) - def keys(self, *args, **kwargs): - return self._cobj.keys(*args, **kwargs) - def open(self, *args, **kwargs): - return self._cobj.open(*args, **kwargs) - def put(self, *args, **kwargs): - return self._cobj.put(*args, **kwargs) - def remove(self, *args, **kwargs): - return self._cobj.remove(*args, **kwargs) - def rename(self, *args, **kwargs): - return self._cobj.rename(*args, **kwargs) - def set_bt_minkey(self, *args, **kwargs): - return self._cobj.set_bt_minkey(*args, **kwargs) - def set_bt_compare(self, *args, **kwargs): - return self._cobj.set_bt_compare(*args, **kwargs) - def set_cachesize(self, *args, **kwargs): - return self._cobj.set_cachesize(*args, **kwargs) - def set_dup_compare(self, *args, **kwargs) : - return self._cobj.set_dup_compare(*args, **kwargs) - def set_flags(self, *args, **kwargs): - return self._cobj.set_flags(*args, **kwargs) - def set_h_ffactor(self, *args, **kwargs): - return self._cobj.set_h_ffactor(*args, **kwargs) - def set_h_nelem(self, *args, **kwargs): - return self._cobj.set_h_nelem(*args, **kwargs) - def set_lorder(self, *args, **kwargs): - return self._cobj.set_lorder(*args, **kwargs) - def set_pagesize(self, *args, **kwargs): - return self._cobj.set_pagesize(*args, **kwargs) - def set_re_delim(self, *args, **kwargs): - return self._cobj.set_re_delim(*args, **kwargs) - def set_re_len(self, *args, **kwargs): - return self._cobj.set_re_len(*args, **kwargs) - def set_re_pad(self, *args, **kwargs): - return self._cobj.set_re_pad(*args, **kwargs) - def set_re_source(self, *args, **kwargs): - return self._cobj.set_re_source(*args, **kwargs) - def set_q_extentsize(self, *args, **kwargs): - return self._cobj.set_q_extentsize(*args, **kwargs) - def stat(self, *args, **kwargs): - return self._cobj.stat(*args, **kwargs) - def sync(self, *args, **kwargs): - return self._cobj.sync(*args, **kwargs) - def type(self, *args, **kwargs): - return self._cobj.type(*args, **kwargs) - def upgrade(self, *args, **kwargs): - return self._cobj.upgrade(*args, **kwargs) - def values(self, *args, **kwargs): - return self._cobj.values(*args, **kwargs) - def verify(self, *args, **kwargs): - return self._cobj.verify(*args, **kwargs) - def set_get_returns_none(self, *args, **kwargs): - return self._cobj.set_get_returns_none(*args, **kwargs) - - def set_encrypt(self, *args, **kwargs): - return self._cobj.set_encrypt(*args, **kwargs) - - -class DBSequence: - def __init__(self, *args, **kwargs): - self._cobj = db.DBSequence(*args, **kwargs) - - def close(self, *args, **kwargs): - return self._cobj.close(*args, **kwargs) - def get(self, *args, **kwargs): - return self._cobj.get(*args, **kwargs) - def get_dbp(self, *args, **kwargs): - return self._cobj.get_dbp(*args, **kwargs) - def get_key(self, *args, **kwargs): - return self._cobj.get_key(*args, **kwargs) - def init_value(self, *args, **kwargs): - return self._cobj.init_value(*args, **kwargs) - def open(self, *args, **kwargs): - return self._cobj.open(*args, **kwargs) - def remove(self, *args, **kwargs): - return self._cobj.remove(*args, **kwargs) - def stat(self, *args, **kwargs): - return self._cobj.stat(*args, **kwargs) - def set_cachesize(self, *args, **kwargs): - return self._cobj.set_cachesize(*args, **kwargs) - def set_flags(self, *args, **kwargs): - return self._cobj.set_flags(*args, **kwargs) - def set_range(self, *args, **kwargs): - return self._cobj.set_range(*args, **kwargs) - def get_cachesize(self, *args, **kwargs): - return self._cobj.get_cachesize(*args, **kwargs) - def get_flags(self, *args, **kwargs): - return self._cobj.get_flags(*args, **kwargs) - def get_range(self, *args, **kwargs): - return self._cobj.get_range(*args, **kwargs) diff --git a/python/Lib/bsddb/dbrecio.py b/python/Lib/bsddb/dbrecio.py deleted file mode 100755 index d439f3255e..0000000000 --- a/python/Lib/bsddb/dbrecio.py +++ /dev/null @@ -1,190 +0,0 @@ - -""" -File-like objects that read from or write to a bsddb record. - -This implements (nearly) all stdio methods. - -f = DBRecIO(db, key, txn=None) -f.close() # explicitly release resources held -flag = f.isatty() # always false -pos = f.tell() # get current position -f.seek(pos) # set current position -f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF -buf = f.read() # read until EOF -buf = f.read(n) # read up to n bytes -f.truncate([size]) # truncate file at to at most size (default: current pos) -f.write(buf) # write at current position -f.writelines(list) # for line in list: f.write(line) - -Notes: -- fileno() is left unimplemented so that code which uses it triggers - an exception early. -- There's a simple test set (see end of this file) - not yet updated - for DBRecIO. -- readline() is not implemented yet. - - -From: - Itamar Shtull-Trauring -""" - -import errno -import string - -class DBRecIO: - def __init__(self, db, key, txn=None): - self.db = db - self.key = key - self.txn = txn - self.len = None - self.pos = 0 - self.closed = 0 - self.softspace = 0 - - def close(self): - if not self.closed: - self.closed = 1 - del self.db, self.txn - - def isatty(self): - if self.closed: - raise ValueError, "I/O operation on closed file" - return 0 - - def seek(self, pos, mode = 0): - if self.closed: - raise ValueError, "I/O operation on closed file" - if mode == 1: - pos = pos + self.pos - elif mode == 2: - pos = pos + self.len - self.pos = max(0, pos) - - def tell(self): - if self.closed: - raise ValueError, "I/O operation on closed file" - return self.pos - - def read(self, n = -1): - if self.closed: - raise ValueError, "I/O operation on closed file" - if n < 0: - newpos = self.len - else: - newpos = min(self.pos+n, self.len) - - dlen = newpos - self.pos - - r = self.db.get(self.key, txn=self.txn, dlen=dlen, doff=self.pos) - self.pos = newpos - return r - - __fixme = """ - def readline(self, length=None): - if self.closed: - raise ValueError, "I/O operation on closed file" - if self.buflist: - self.buf = self.buf + string.joinfields(self.buflist, '') - self.buflist = [] - i = string.find(self.buf, '\n', self.pos) - if i < 0: - newpos = self.len - else: - newpos = i+1 - if length is not None: - if self.pos + length < newpos: - newpos = self.pos + length - r = self.buf[self.pos:newpos] - self.pos = newpos - return r - - def readlines(self, sizehint = 0): - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - """ - - def truncate(self, size=None): - if self.closed: - raise ValueError, "I/O operation on closed file" - if size is None: - size = self.pos - elif size < 0: - raise IOError(errno.EINVAL, - "Negative size not allowed") - elif size < self.pos: - self.pos = size - self.db.put(self.key, "", txn=self.txn, dlen=self.len-size, doff=size) - - def write(self, s): - if self.closed: - raise ValueError, "I/O operation on closed file" - if not s: return - if self.pos > self.len: - self.buflist.append('\0'*(self.pos - self.len)) - self.len = self.pos - newpos = self.pos + len(s) - self.db.put(self.key, s, txn=self.txn, dlen=len(s), doff=self.pos) - self.pos = newpos - - def writelines(self, list): - self.write(string.joinfields(list, '')) - - def flush(self): - if self.closed: - raise ValueError, "I/O operation on closed file" - - -""" -# A little test suite - -def _test(): - import sys - if sys.argv[1:]: - file = sys.argv[1] - else: - file = '/etc/passwd' - lines = open(file, 'r').readlines() - text = open(file, 'r').read() - f = StringIO() - for line in lines[:-2]: - f.write(line) - f.writelines(lines[-2:]) - if f.getvalue() != text: - raise RuntimeError, 'write failed' - length = f.tell() - print 'File length =', length - f.seek(len(lines[0])) - f.write(lines[1]) - f.seek(0) - print 'First line =', repr(f.readline()) - here = f.tell() - line = f.readline() - print 'Second line =', repr(line) - f.seek(-len(line), 1) - line2 = f.read(len(line)) - if line != line2: - raise RuntimeError, 'bad result after seek back' - f.seek(len(line2), 1) - list = f.readlines() - line = list[-1] - f.seek(f.tell() - len(line)) - line2 = f.read() - if line != line2: - raise RuntimeError, 'bad result after seek back from EOF' - print 'Read', len(list), 'more lines' - print 'File length =', f.tell() - if f.tell() != length: - raise RuntimeError, 'bad length' - f.close() - -if __name__ == '__main__': - _test() -""" diff --git a/python/Lib/bsddb/dbshelve.py b/python/Lib/bsddb/dbshelve.py deleted file mode 100755 index 7d0daa2f25..0000000000 --- a/python/Lib/bsddb/dbshelve.py +++ /dev/null @@ -1,381 +0,0 @@ -#------------------------------------------------------------------------ -# Copyright (c) 1997-2001 by Total Control Software -# All Rights Reserved -#------------------------------------------------------------------------ -# -# Module Name: dbShelve.py -# -# Description: A reimplementation of the standard shelve.py that -# forces the use of cPickle, and DB. -# -# Creation Date: 11/3/97 3:39:04PM -# -# License: This is free software. You may use this software for any -# purpose including modification/redistribution, so long as -# this header remains intact and that you do not claim any -# rights of ownership or authorship of this software. This -# software has been tested, but no warranty is expressed or -# implied. -# -# 13-Dec-2000: Updated to be used with the new bsddb3 package. -# Added DBShelfCursor class. -# -#------------------------------------------------------------------------ - -"""Manage shelves of pickled objects using bsddb database files for the -storage. -""" - -#------------------------------------------------------------------------ - -import sys -absolute_import = (sys.version_info[0] >= 3) -if absolute_import : - # Because this syntaxis is not valid before Python 2.5 - exec("from . import db") -else : - import db - -if sys.version_info[0] >= 3 : - import cPickle # Will be converted to "pickle" by "2to3" -else : - if sys.version_info < (2, 6) : - import cPickle - else : - # When we drop support for python 2.4 - # we could use: (in 2.5 we need a __future__ statement) - # - # with warnings.catch_warnings(): - # warnings.filterwarnings(...) - # ... - # - # We can not use "with" as is, because it would be invalid syntax - # in python 2.4 and (with no __future__) 2.5. - # Here we simulate "with" following PEP 343 : - import warnings - w = warnings.catch_warnings() - w.__enter__() - try : - warnings.filterwarnings('ignore', - message='the cPickle module has been removed in Python 3.0', - category=DeprecationWarning) - import cPickle - finally : - w.__exit__() - del w - -HIGHEST_PROTOCOL = cPickle.HIGHEST_PROTOCOL -def _dumps(object, protocol): - return cPickle.dumps(object, protocol=protocol) - -if sys.version_info < (2, 6) : - from UserDict import DictMixin as MutableMapping -else : - import collections - MutableMapping = collections.MutableMapping - -#------------------------------------------------------------------------ - - -def open(filename, flags=db.DB_CREATE, mode=0660, filetype=db.DB_HASH, - dbenv=None, dbname=None): - """ - A simple factory function for compatibility with the standard - shleve.py module. It can be used like this, where key is a string - and data is a pickleable object: - - from bsddb import dbshelve - db = dbshelve.open(filename) - - db[key] = data - - db.close() - """ - if type(flags) == type(''): - sflag = flags - if sflag == 'r': - flags = db.DB_RDONLY - elif sflag == 'rw': - flags = 0 - elif sflag == 'w': - flags = db.DB_CREATE - elif sflag == 'c': - flags = db.DB_CREATE - elif sflag == 'n': - flags = db.DB_TRUNCATE | db.DB_CREATE - else: - raise db.DBError, "flags should be one of 'r', 'w', 'c' or 'n' or use the bsddb.db.DB_* flags" - - d = DBShelf(dbenv) - d.open(filename, dbname, filetype, flags, mode) - return d - -#--------------------------------------------------------------------------- - -class DBShelveError(db.DBError): pass - - -class DBShelf(MutableMapping): - """A shelf to hold pickled objects, built upon a bsddb DB object. It - automatically pickles/unpickles data objects going to/from the DB. - """ - def __init__(self, dbenv=None): - self.db = db.DB(dbenv) - self._closed = True - if HIGHEST_PROTOCOL: - self.protocol = HIGHEST_PROTOCOL - else: - self.protocol = 1 - - - def __del__(self): - self.close() - - - def __getattr__(self, name): - """Many methods we can just pass through to the DB object. - (See below) - """ - return getattr(self.db, name) - - - #----------------------------------- - # Dictionary access methods - - def __len__(self): - return len(self.db) - - - def __getitem__(self, key): - data = self.db[key] - return cPickle.loads(data) - - - def __setitem__(self, key, value): - data = _dumps(value, self.protocol) - self.db[key] = data - - - def __delitem__(self, key): - del self.db[key] - - - def keys(self, txn=None): - if txn is not None: - return self.db.keys(txn) - else: - return self.db.keys() - - if sys.version_info >= (2, 6) : - def __iter__(self) : # XXX: Load all keys in memory :-( - for k in self.db.keys() : - yield k - - # Do this when "DB" support iteration - # Or is it enough to pass thru "getattr"? - # - # def __iter__(self) : - # return self.db.__iter__() - - - def open(self, *args, **kwargs): - self.db.open(*args, **kwargs) - self._closed = False - - - def close(self, *args, **kwargs): - self.db.close(*args, **kwargs) - self._closed = True - - - def __repr__(self): - if self._closed: - return '' % (id(self)) - else: - return repr(dict(self.iteritems())) - - - def items(self, txn=None): - if txn is not None: - items = self.db.items(txn) - else: - items = self.db.items() - newitems = [] - - for k, v in items: - newitems.append( (k, cPickle.loads(v)) ) - return newitems - - def values(self, txn=None): - if txn is not None: - values = self.db.values(txn) - else: - values = self.db.values() - - return map(cPickle.loads, values) - - #----------------------------------- - # Other methods - - def __append(self, value, txn=None): - data = _dumps(value, self.protocol) - return self.db.append(data, txn) - - def append(self, value, txn=None): - if self.get_type() == db.DB_RECNO: - return self.__append(value, txn=txn) - raise DBShelveError, "append() only supported when dbshelve opened with filetype=dbshelve.db.DB_RECNO" - - - def associate(self, secondaryDB, callback, flags=0): - def _shelf_callback(priKey, priData, realCallback=callback): - # Safe in Python 2.x because expresion short circuit - if sys.version_info[0] < 3 or isinstance(priData, bytes) : - data = cPickle.loads(priData) - else : - data = cPickle.loads(bytes(priData, "iso8859-1")) # 8 bits - return realCallback(priKey, data) - - return self.db.associate(secondaryDB, _shelf_callback, flags) - - - #def get(self, key, default=None, txn=None, flags=0): - def get(self, *args, **kw): - # We do it with *args and **kw so if the default value wasn't - # given nothing is passed to the extension module. That way - # an exception can be raised if set_get_returns_none is turned - # off. - data = self.db.get(*args, **kw) - try: - return cPickle.loads(data) - except (EOFError, TypeError, cPickle.UnpicklingError): - return data # we may be getting the default value, or None, - # so it doesn't need unpickled. - - def get_both(self, key, value, txn=None, flags=0): - data = _dumps(value, self.protocol) - data = self.db.get(key, data, txn, flags) - return cPickle.loads(data) - - - def cursor(self, txn=None, flags=0): - c = DBShelfCursor(self.db.cursor(txn, flags)) - c.protocol = self.protocol - return c - - - def put(self, key, value, txn=None, flags=0): - data = _dumps(value, self.protocol) - return self.db.put(key, data, txn, flags) - - - def join(self, cursorList, flags=0): - raise NotImplementedError - - - #---------------------------------------------- - # Methods allowed to pass-through to self.db - # - # close, delete, fd, get_byteswapped, get_type, has_key, - # key_range, open, remove, rename, stat, sync, - # upgrade, verify, and all set_* methods. - - -#--------------------------------------------------------------------------- - -class DBShelfCursor: - """ - """ - def __init__(self, cursor): - self.dbc = cursor - - def __del__(self): - self.close() - - - def __getattr__(self, name): - """Some methods we can just pass through to the cursor object. (See below)""" - return getattr(self.dbc, name) - - - #---------------------------------------------- - - def dup(self, flags=0): - c = DBShelfCursor(self.dbc.dup(flags)) - c.protocol = self.protocol - return c - - - def put(self, key, value, flags=0): - data = _dumps(value, self.protocol) - return self.dbc.put(key, data, flags) - - - def get(self, *args): - count = len(args) # a method overloading hack - method = getattr(self, 'get_%d' % count) - method(*args) - - def get_1(self, flags): - rec = self.dbc.get(flags) - return self._extract(rec) - - def get_2(self, key, flags): - rec = self.dbc.get(key, flags) - return self._extract(rec) - - def get_3(self, key, value, flags): - data = _dumps(value, self.protocol) - rec = self.dbc.get(key, flags) - return self._extract(rec) - - - def current(self, flags=0): return self.get_1(flags|db.DB_CURRENT) - def first(self, flags=0): return self.get_1(flags|db.DB_FIRST) - def last(self, flags=0): return self.get_1(flags|db.DB_LAST) - def next(self, flags=0): return self.get_1(flags|db.DB_NEXT) - def prev(self, flags=0): return self.get_1(flags|db.DB_PREV) - def consume(self, flags=0): return self.get_1(flags|db.DB_CONSUME) - def next_dup(self, flags=0): return self.get_1(flags|db.DB_NEXT_DUP) - def next_nodup(self, flags=0): return self.get_1(flags|db.DB_NEXT_NODUP) - def prev_nodup(self, flags=0): return self.get_1(flags|db.DB_PREV_NODUP) - - - def get_both(self, key, value, flags=0): - data = _dumps(value, self.protocol) - rec = self.dbc.get_both(key, flags) - return self._extract(rec) - - - def set(self, key, flags=0): - rec = self.dbc.set(key, flags) - return self._extract(rec) - - def set_range(self, key, flags=0): - rec = self.dbc.set_range(key, flags) - return self._extract(rec) - - def set_recno(self, recno, flags=0): - rec = self.dbc.set_recno(recno, flags) - return self._extract(rec) - - set_both = get_both - - def _extract(self, rec): - if rec is None: - return None - else: - key, data = rec - # Safe in Python 2.x because expresion short circuit - if sys.version_info[0] < 3 or isinstance(data, bytes) : - return key, cPickle.loads(data) - else : - return key, cPickle.loads(bytes(data, "iso8859-1")) # 8 bits - - #---------------------------------------------- - # Methods allowed to pass-through to self.dbc - # - # close, count, delete, get_recno, join_item - - -#--------------------------------------------------------------------------- diff --git a/python/Lib/bsddb/dbtables.py b/python/Lib/bsddb/dbtables.py deleted file mode 100755 index e8acdd0055..0000000000 --- a/python/Lib/bsddb/dbtables.py +++ /dev/null @@ -1,843 +0,0 @@ -#----------------------------------------------------------------------- -# -# Copyright (C) 2000, 2001 by Autonomous Zone Industries -# Copyright (C) 2002 Gregory P. Smith -# -# License: This is free software. You may use this software for any -# purpose including modification/redistribution, so long as -# this header remains intact and that you do not claim any -# rights of ownership or authorship of this software. This -# software has been tested, but no warranty is expressed or -# implied. -# -# -- Gregory P. Smith - -# This provides a simple database table interface built on top of -# the Python Berkeley DB 3 interface. -# -_cvsid = '$Id$' - -import re -import sys -import copy -import random -import struct - - -if sys.version_info[0] >= 3 : - import pickle -else : - if sys.version_info < (2, 6) : - import cPickle as pickle - else : - # When we drop support for python 2.4 - # we could use: (in 2.5 we need a __future__ statement) - # - # with warnings.catch_warnings(): - # warnings.filterwarnings(...) - # ... - # - # We can not use "with" as is, because it would be invalid syntax - # in python 2.4 and (with no __future__) 2.5. - # Here we simulate "with" following PEP 343 : - import warnings - w = warnings.catch_warnings() - w.__enter__() - try : - warnings.filterwarnings('ignore', - message='the cPickle module has been removed in Python 3.0', - category=DeprecationWarning) - import cPickle as pickle - finally : - w.__exit__() - del w - -try: - # For Pythons w/distutils pybsddb - from bsddb3 import db -except ImportError: - # For Python 2.3 - from bsddb import db - -class TableDBError(StandardError): - pass -class TableAlreadyExists(TableDBError): - pass - - -class Cond: - """This condition matches everything""" - def __call__(self, s): - return 1 - -class ExactCond(Cond): - """Acts as an exact match condition function""" - def __init__(self, strtomatch): - self.strtomatch = strtomatch - def __call__(self, s): - return s == self.strtomatch - -class PrefixCond(Cond): - """Acts as a condition function for matching a string prefix""" - def __init__(self, prefix): - self.prefix = prefix - def __call__(self, s): - return s[:len(self.prefix)] == self.prefix - -class PostfixCond(Cond): - """Acts as a condition function for matching a string postfix""" - def __init__(self, postfix): - self.postfix = postfix - def __call__(self, s): - return s[-len(self.postfix):] == self.postfix - -class LikeCond(Cond): - """ - Acts as a function that will match using an SQL 'LIKE' style - string. Case insensitive and % signs are wild cards. - This isn't perfect but it should work for the simple common cases. - """ - def __init__(self, likestr, re_flags=re.IGNORECASE): - # escape python re characters - chars_to_escape = '.*+()[]?' - for char in chars_to_escape : - likestr = likestr.replace(char, '\\'+char) - # convert %s to wildcards - self.likestr = likestr.replace('%', '.*') - self.re = re.compile('^'+self.likestr+'$', re_flags) - def __call__(self, s): - return self.re.match(s) - -# -# keys used to store database metadata -# -_table_names_key = '__TABLE_NAMES__' # list of the tables in this db -_columns = '._COLUMNS__' # table_name+this key contains a list of columns - -def _columns_key(table): - return table + _columns - -# -# these keys are found within table sub databases -# -_data = '._DATA_.' # this+column+this+rowid key contains table data -_rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each - # row in the table. (no data is stored) -_rowid_str_len = 8 # length in bytes of the unique rowid strings - - -def _data_key(table, col, rowid): - return table + _data + col + _data + rowid - -def _search_col_data_key(table, col): - return table + _data + col + _data - -def _search_all_data_key(table): - return table + _data - -def _rowid_key(table, rowid): - return table + _rowid + rowid + _rowid - -def _search_rowid_key(table): - return table + _rowid - -def contains_metastrings(s) : - """Verify that the given string does not contain any - metadata strings that might interfere with dbtables database operation. - """ - if (s.find(_table_names_key) >= 0 or - s.find(_columns) >= 0 or - s.find(_data) >= 0 or - s.find(_rowid) >= 0): - # Then - return 1 - else: - return 0 - - -class bsdTableDB : - def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600, - recover=0, dbflags=0): - """bsdTableDB(filename, dbhome, create=0, truncate=0, mode=0600) - - Open database name in the dbhome Berkeley DB directory. - Use keyword arguments when calling this constructor. - """ - self.db = None - myflags = db.DB_THREAD - if create: - myflags |= db.DB_CREATE - flagsforenv = (db.DB_INIT_MPOOL | db.DB_INIT_LOCK | db.DB_INIT_LOG | - db.DB_INIT_TXN | dbflags) - # DB_AUTO_COMMIT isn't a valid flag for env.open() - try: - dbflags |= db.DB_AUTO_COMMIT - except AttributeError: - pass - if recover: - flagsforenv = flagsforenv | db.DB_RECOVER - self.env = db.DBEnv() - # enable auto deadlock avoidance - self.env.set_lk_detect(db.DB_LOCK_DEFAULT) - self.env.open(dbhome, myflags | flagsforenv) - if truncate: - myflags |= db.DB_TRUNCATE - self.db = db.DB(self.env) - # this code relies on DBCursor.set* methods to raise exceptions - # rather than returning None - self.db.set_get_returns_none(1) - # allow duplicate entries [warning: be careful w/ metadata] - self.db.set_flags(db.DB_DUP) - self.db.open(filename, db.DB_BTREE, dbflags | myflags, mode) - self.dbfilename = filename - - if sys.version_info[0] >= 3 : - class cursor_py3k(object) : - def __init__(self, dbcursor) : - self._dbcursor = dbcursor - - def close(self) : - return self._dbcursor.close() - - def set_range(self, search) : - v = self._dbcursor.set_range(bytes(search, "iso8859-1")) - if v is not None : - v = (v[0].decode("iso8859-1"), - v[1].decode("iso8859-1")) - return v - - def __next__(self) : - v = getattr(self._dbcursor, "next")() - if v is not None : - v = (v[0].decode("iso8859-1"), - v[1].decode("iso8859-1")) - return v - - class db_py3k(object) : - def __init__(self, db) : - self._db = db - - def cursor(self, txn=None) : - return cursor_py3k(self._db.cursor(txn=txn)) - - def has_key(self, key, txn=None) : - return getattr(self._db,"has_key")(bytes(key, "iso8859-1"), - txn=txn) - - def put(self, key, value, flags=0, txn=None) : - key = bytes(key, "iso8859-1") - if value is not None : - value = bytes(value, "iso8859-1") - return self._db.put(key, value, flags=flags, txn=txn) - - def put_bytes(self, key, value, txn=None) : - key = bytes(key, "iso8859-1") - return self._db.put(key, value, txn=txn) - - def get(self, key, txn=None, flags=0) : - key = bytes(key, "iso8859-1") - v = self._db.get(key, txn=txn, flags=flags) - if v is not None : - v = v.decode("iso8859-1") - return v - - def get_bytes(self, key, txn=None, flags=0) : - key = bytes(key, "iso8859-1") - return self._db.get(key, txn=txn, flags=flags) - - def delete(self, key, txn=None) : - key = bytes(key, "iso8859-1") - return self._db.delete(key, txn=txn) - - def close (self) : - return self._db.close() - - self.db = db_py3k(self.db) - else : # Python 2.x - pass - - # Initialize the table names list if this is a new database - txn = self.env.txn_begin() - try: - if not getattr(self.db, "has_key")(_table_names_key, txn): - getattr(self.db, "put_bytes", self.db.put) \ - (_table_names_key, pickle.dumps([], 1), txn=txn) - # Yes, bare except - except: - txn.abort() - raise - else: - txn.commit() - # TODO verify more of the database's metadata? - self.__tablecolumns = {} - - def __del__(self): - self.close() - - def close(self): - if self.db is not None: - self.db.close() - self.db = None - if self.env is not None: - self.env.close() - self.env = None - - def checkpoint(self, mins=0): - self.env.txn_checkpoint(mins) - - def sync(self): - self.db.sync() - - def _db_print(self) : - """Print the database to stdout for debugging""" - print "******** Printing raw database for debugging ********" - cur = self.db.cursor() - try: - key, data = cur.first() - while 1: - print repr({key: data}) - next = cur.next() - if next: - key, data = next - else: - cur.close() - return - except db.DBNotFoundError: - cur.close() - - - def CreateTable(self, table, columns): - """CreateTable(table, columns) - Create a new table in the database. - - raises TableDBError if it already exists or for other DB errors. - """ - assert isinstance(columns, list) - - txn = None - try: - # checking sanity of the table and column names here on - # table creation will prevent problems elsewhere. - if contains_metastrings(table): - raise ValueError( - "bad table name: contains reserved metastrings") - for column in columns : - if contains_metastrings(column): - raise ValueError( - "bad column name: contains reserved metastrings") - - columnlist_key = _columns_key(table) - if getattr(self.db, "has_key")(columnlist_key): - raise TableAlreadyExists, "table already exists" - - txn = self.env.txn_begin() - # store the table's column info - getattr(self.db, "put_bytes", self.db.put)(columnlist_key, - pickle.dumps(columns, 1), txn=txn) - - # add the table name to the tablelist - tablelist = pickle.loads(getattr(self.db, "get_bytes", - self.db.get) (_table_names_key, txn=txn, flags=db.DB_RMW)) - tablelist.append(table) - # delete 1st, in case we opened with DB_DUP - self.db.delete(_table_names_key, txn=txn) - getattr(self.db, "put_bytes", self.db.put)(_table_names_key, - pickle.dumps(tablelist, 1), txn=txn) - - txn.commit() - txn = None - except db.DBError, dberror: - if txn: - txn.abort() - if sys.version_info < (2, 6) : - raise TableDBError, dberror[1] - else : - raise TableDBError, dberror.args[1] - - - def ListTableColumns(self, table): - """Return a list of columns in the given table. - [] if the table doesn't exist. - """ - assert isinstance(table, str) - if contains_metastrings(table): - raise ValueError, "bad table name: contains reserved metastrings" - - columnlist_key = _columns_key(table) - if not getattr(self.db, "has_key")(columnlist_key): - return [] - pickledcolumnlist = getattr(self.db, "get_bytes", - self.db.get)(columnlist_key) - if pickledcolumnlist: - return pickle.loads(pickledcolumnlist) - else: - return [] - - def ListTables(self): - """Return a list of tables in this database.""" - pickledtablelist = self.db.get_get(_table_names_key) - if pickledtablelist: - return pickle.loads(pickledtablelist) - else: - return [] - - def CreateOrExtendTable(self, table, columns): - """CreateOrExtendTable(table, columns) - - Create a new table in the database. - - If a table of this name already exists, extend it to have any - additional columns present in the given list as well as - all of its current columns. - """ - assert isinstance(columns, list) - - try: - self.CreateTable(table, columns) - except TableAlreadyExists: - # the table already existed, add any new columns - txn = None - try: - columnlist_key = _columns_key(table) - txn = self.env.txn_begin() - - # load the current column list - oldcolumnlist = pickle.loads( - getattr(self.db, "get_bytes", - self.db.get)(columnlist_key, txn=txn, flags=db.DB_RMW)) - # create a hash table for fast lookups of column names in the - # loop below - oldcolumnhash = {} - for c in oldcolumnlist: - oldcolumnhash[c] = c - - # create a new column list containing both the old and new - # column names - newcolumnlist = copy.copy(oldcolumnlist) - for c in columns: - if not c in oldcolumnhash: - newcolumnlist.append(c) - - # store the table's new extended column list - if newcolumnlist != oldcolumnlist : - # delete the old one first since we opened with DB_DUP - self.db.delete(columnlist_key, txn=txn) - getattr(self.db, "put_bytes", self.db.put)(columnlist_key, - pickle.dumps(newcolumnlist, 1), - txn=txn) - - txn.commit() - txn = None - - self.__load_column_info(table) - except db.DBError, dberror: - if txn: - txn.abort() - if sys.version_info < (2, 6) : - raise TableDBError, dberror[1] - else : - raise TableDBError, dberror.args[1] - - - def __load_column_info(self, table) : - """initialize the self.__tablecolumns dict""" - # check the column names - try: - tcolpickles = getattr(self.db, "get_bytes", - self.db.get)(_columns_key(table)) - except db.DBNotFoundError: - raise TableDBError, "unknown table: %r" % (table,) - if not tcolpickles: - raise TableDBError, "unknown table: %r" % (table,) - self.__tablecolumns[table] = pickle.loads(tcolpickles) - - def __new_rowid(self, table, txn) : - """Create a new unique row identifier""" - unique = 0 - while not unique: - # Generate a random 64-bit row ID string - # (note: might have <64 bits of true randomness - # but it's plenty for our database id needs!) - blist = [] - for x in xrange(_rowid_str_len): - blist.append(random.randint(0,255)) - newid = struct.pack('B'*_rowid_str_len, *blist) - - if sys.version_info[0] >= 3 : - newid = newid.decode("iso8859-1") # 8 bits - - # Guarantee uniqueness by adding this key to the database - try: - self.db.put(_rowid_key(table, newid), None, txn=txn, - flags=db.DB_NOOVERWRITE) - except db.DBKeyExistError: - pass - else: - unique = 1 - - return newid - - - def Insert(self, table, rowdict) : - """Insert(table, datadict) - Insert a new row into the table - using the keys+values from rowdict as the column values. - """ - - txn = None - try: - if not getattr(self.db, "has_key")(_columns_key(table)): - raise TableDBError, "unknown table" - - # check the validity of each column name - if not table in self.__tablecolumns: - self.__load_column_info(table) - for column in rowdict.keys() : - if not self.__tablecolumns[table].count(column): - raise TableDBError, "unknown column: %r" % (column,) - - # get a unique row identifier for this row - txn = self.env.txn_begin() - rowid = self.__new_rowid(table, txn=txn) - - # insert the row values into the table database - for column, dataitem in rowdict.items(): - # store the value - self.db.put(_data_key(table, column, rowid), dataitem, txn=txn) - - txn.commit() - txn = None - - except db.DBError, dberror: - # WIBNI we could just abort the txn and re-raise the exception? - # But no, because TableDBError is not related to DBError via - # inheritance, so it would be backwards incompatible. Do the next - # best thing. - info = sys.exc_info() - if txn: - txn.abort() - self.db.delete(_rowid_key(table, rowid)) - if sys.version_info < (2, 6) : - raise TableDBError, dberror[1], info[2] - else : - raise TableDBError, dberror.args[1], info[2] - - - def Modify(self, table, conditions={}, mappings={}): - """Modify(table, conditions={}, mappings={}) - Modify items in rows matching 'conditions' using mapping functions in 'mappings' - - * table - the table name - * conditions - a dictionary keyed on column names containing - a condition callable expecting the data string as an - argument and returning a boolean. - * mappings - a dictionary keyed on column names containing a - condition callable expecting the data string as an argument and - returning the new string for that column. - """ - - try: - matching_rowids = self.__Select(table, [], conditions) - - # modify only requested columns - columns = mappings.keys() - for rowid in matching_rowids.keys(): - txn = None - try: - for column in columns: - txn = self.env.txn_begin() - # modify the requested column - try: - dataitem = self.db.get( - _data_key(table, column, rowid), - txn=txn) - self.db.delete( - _data_key(table, column, rowid), - txn=txn) - except db.DBNotFoundError: - # XXXXXXX row key somehow didn't exist, assume no - # error - dataitem = None - dataitem = mappings[column](dataitem) - if dataitem is not None: - self.db.put( - _data_key(table, column, rowid), - dataitem, txn=txn) - txn.commit() - txn = None - - # catch all exceptions here since we call unknown callables - except: - if txn: - txn.abort() - raise - - except db.DBError, dberror: - if sys.version_info < (2, 6) : - raise TableDBError, dberror[1] - else : - raise TableDBError, dberror.args[1] - - def Delete(self, table, conditions={}): - """Delete(table, conditions) - Delete items matching the given - conditions from the table. - - * conditions - a dictionary keyed on column names containing - condition functions expecting the data string as an - argument and returning a boolean. - """ - - try: - matching_rowids = self.__Select(table, [], conditions) - - # delete row data from all columns - columns = self.__tablecolumns[table] - for rowid in matching_rowids.keys(): - txn = None - try: - txn = self.env.txn_begin() - for column in columns: - # delete the data key - try: - self.db.delete(_data_key(table, column, rowid), - txn=txn) - except db.DBNotFoundError: - # XXXXXXX column may not exist, assume no error - pass - - try: - self.db.delete(_rowid_key(table, rowid), txn=txn) - except db.DBNotFoundError: - # XXXXXXX row key somehow didn't exist, assume no error - pass - txn.commit() - txn = None - except db.DBError, dberror: - if txn: - txn.abort() - raise - except db.DBError, dberror: - if sys.version_info < (2, 6) : - raise TableDBError, dberror[1] - else : - raise TableDBError, dberror.args[1] - - - def Select(self, table, columns, conditions={}): - """Select(table, columns, conditions) - retrieve specific row data - Returns a list of row column->value mapping dictionaries. - - * columns - a list of which column data to return. If - columns is None, all columns will be returned. - * conditions - a dictionary keyed on column names - containing callable conditions expecting the data string as an - argument and returning a boolean. - """ - try: - if not table in self.__tablecolumns: - self.__load_column_info(table) - if columns is None: - columns = self.__tablecolumns[table] - matching_rowids = self.__Select(table, columns, conditions) - except db.DBError, dberror: - if sys.version_info < (2, 6) : - raise TableDBError, dberror[1] - else : - raise TableDBError, dberror.args[1] - # return the matches as a list of dictionaries - return matching_rowids.values() - - - def __Select(self, table, columns, conditions): - """__Select() - Used to implement Select and Delete (above) - Returns a dictionary keyed on rowids containing dicts - holding the row data for columns listed in the columns param - that match the given conditions. - * conditions is a dictionary keyed on column names - containing callable conditions expecting the data string as an - argument and returning a boolean. - """ - # check the validity of each column name - if not table in self.__tablecolumns: - self.__load_column_info(table) - if columns is None: - columns = self.tablecolumns[table] - for column in (columns + conditions.keys()): - if not self.__tablecolumns[table].count(column): - raise TableDBError, "unknown column: %r" % (column,) - - # keyed on rows that match so far, containings dicts keyed on - # column names containing the data for that row and column. - matching_rowids = {} - # keys are rowids that do not match - rejected_rowids = {} - - # attempt to sort the conditions in such a way as to minimize full - # column lookups - def cmp_conditions(atuple, btuple): - a = atuple[1] - b = btuple[1] - if type(a) is type(b): - - # Needed for python 3. "cmp" vanished in 3.0.1 - def cmp(a, b) : - if a==b : return 0 - if a 0: - for rowid, rowdata in matching_rowids.items(): - for column in columns: - if column in rowdata: - continue - try: - rowdata[column] = self.db.get( - _data_key(table, column, rowid)) - except db.DBError, dberror: - if sys.version_info < (2, 6) : - if dberror[0] != db.DB_NOTFOUND: - raise - else : - if dberror.args[0] != db.DB_NOTFOUND: - raise - rowdata[column] = None - - # return the matches - return matching_rowids - - - def Drop(self, table): - """Remove an entire table from the database""" - txn = None - try: - txn = self.env.txn_begin() - - # delete the column list - self.db.delete(_columns_key(table), txn=txn) - - cur = self.db.cursor(txn) - - # delete all keys containing this tables column and row info - table_key = _search_all_data_key(table) - while 1: - try: - key, data = cur.set_range(table_key) - except db.DBNotFoundError: - break - # only delete items in this table - if key[:len(table_key)] != table_key: - break - cur.delete() - - # delete all rowids used by this table - table_key = _search_rowid_key(table) - while 1: - try: - key, data = cur.set_range(table_key) - except db.DBNotFoundError: - break - # only delete items in this table - if key[:len(table_key)] != table_key: - break - cur.delete() - - cur.close() - - # delete the tablename from the table name list - tablelist = pickle.loads( - getattr(self.db, "get_bytes", self.db.get)(_table_names_key, - txn=txn, flags=db.DB_RMW)) - try: - tablelist.remove(table) - except ValueError: - # hmm, it wasn't there, oh well, that's what we want. - pass - # delete 1st, incase we opened with DB_DUP - self.db.delete(_table_names_key, txn=txn) - getattr(self.db, "put_bytes", self.db.put)(_table_names_key, - pickle.dumps(tablelist, 1), txn=txn) - - txn.commit() - txn = None - - if table in self.__tablecolumns: - del self.__tablecolumns[table] - - except db.DBError, dberror: - if txn: - txn.abort() - raise TableDBError(dberror.args[1]) diff --git a/python/Lib/bsddb/dbutils.py b/python/Lib/bsddb/dbutils.py deleted file mode 100755 index 02a686f5d9..0000000000 --- a/python/Lib/bsddb/dbutils.py +++ /dev/null @@ -1,83 +0,0 @@ -#------------------------------------------------------------------------ -# -# Copyright (C) 2000 Autonomous Zone Industries -# -# License: This is free software. You may use this software for any -# purpose including modification/redistribution, so long as -# this header remains intact and that you do not claim any -# rights of ownership or authorship of this software. This -# software has been tested, but no warranty is expressed or -# implied. -# -# Author: Gregory P. Smith -# -# Note: I don't know how useful this is in reality since when a -# DBLockDeadlockError happens the current transaction is supposed to be -# aborted. If it doesn't then when the operation is attempted again -# the deadlock is still happening... -# --Robin -# -#------------------------------------------------------------------------ - - -# -# import the time.sleep function in a namespace safe way to allow -# "from bsddb.dbutils import *" -# -from time import sleep as _sleep - -import sys -absolute_import = (sys.version_info[0] >= 3) -if absolute_import : - # Because this syntaxis is not valid before Python 2.5 - exec("from . import db") -else : - import db - -# always sleep at least N seconds between retrys -_deadlock_MinSleepTime = 1.0/128 -# never sleep more than N seconds between retrys -_deadlock_MaxSleepTime = 3.14159 - -# Assign a file object to this for a "sleeping" message to be written to it -# each retry -_deadlock_VerboseFile = None - - -def DeadlockWrap(function, *_args, **_kwargs): - """DeadlockWrap(function, *_args, **_kwargs) - automatically retries - function in case of a database deadlock. - - This is a function intended to be used to wrap database calls such - that they perform retrys with exponentially backing off sleeps in - between when a DBLockDeadlockError exception is raised. - - A 'max_retries' parameter may optionally be passed to prevent it - from retrying forever (in which case the exception will be reraised). - - d = DB(...) - d.open(...) - DeadlockWrap(d.put, "foo", data="bar") # set key "foo" to "bar" - """ - sleeptime = _deadlock_MinSleepTime - max_retries = _kwargs.get('max_retries', -1) - if 'max_retries' in _kwargs: - del _kwargs['max_retries'] - while True: - try: - return function(*_args, **_kwargs) - except db.DBLockDeadlockError: - if _deadlock_VerboseFile: - _deadlock_VerboseFile.write( - 'dbutils.DeadlockWrap: sleeping %1.3f\n' % sleeptime) - _sleep(sleeptime) - # exponential backoff in the sleep time - sleeptime *= 2 - if sleeptime > _deadlock_MaxSleepTime: - sleeptime = _deadlock_MaxSleepTime - max_retries -= 1 - if max_retries == -1: - raise - - -#------------------------------------------------------------------------ diff --git a/python/Lib/cProfile.py b/python/Lib/cProfile.py deleted file mode 100755 index b2efd047d3..0000000000 --- a/python/Lib/cProfile.py +++ /dev/null @@ -1,199 +0,0 @@ -#! /usr/bin/env python - -"""Python interface for the 'lsprof' profiler. - Compatible with the 'profile' module. -""" - -__all__ = ["run", "runctx", "help", "Profile"] - -import _lsprof - -# ____________________________________________________________ -# Simple interface - -def run(statement, filename=None, sort=-1): - """Run statement under profiler optionally saving results in filename - - This function takes a single argument that can be passed to the - "exec" statement, and an optional file name. In all cases this - routine attempts to "exec" its first argument and gather profiling - statistics from the execution. If no file name is present, then this - function automatically prints a simple profiling report, sorted by the - standard name string (file/line/function-name) that is presented in - each line. - """ - prof = Profile() - result = None - try: - try: - prof = prof.run(statement) - except SystemExit: - pass - finally: - if filename is not None: - prof.dump_stats(filename) - else: - result = prof.print_stats(sort) - return result - -def runctx(statement, globals, locals, filename=None, sort=-1): - """Run statement under profiler, supplying your own globals and locals, - optionally saving results in filename. - - statement and filename have the same semantics as profile.run - """ - prof = Profile() - result = None - try: - try: - prof = prof.runctx(statement, globals, locals) - except SystemExit: - pass - finally: - if filename is not None: - prof.dump_stats(filename) - else: - result = prof.print_stats(sort) - return result - -# Backwards compatibility. -def help(): - print "Documentation for the profile/cProfile modules can be found " - print "in the Python Library Reference, section 'The Python Profiler'." - -# ____________________________________________________________ - -class Profile(_lsprof.Profiler): - """Profile(custom_timer=None, time_unit=None, subcalls=True, builtins=True) - - Builds a profiler object using the specified timer function. - The default timer is a fast built-in one based on real time. - For custom timer functions returning integers, time_unit can - be a float specifying a scale (i.e. how long each integer unit - is, in seconds). - """ - - # Most of the functionality is in the base class. - # This subclass only adds convenient and backward-compatible methods. - - def print_stats(self, sort=-1): - import pstats - pstats.Stats(self).strip_dirs().sort_stats(sort).print_stats() - - def dump_stats(self, file): - import marshal - f = open(file, 'wb') - self.create_stats() - marshal.dump(self.stats, f) - f.close() - - def create_stats(self): - self.disable() - self.snapshot_stats() - - def snapshot_stats(self): - entries = self.getstats() - self.stats = {} - callersdicts = {} - # call information - for entry in entries: - func = label(entry.code) - nc = entry.callcount # ncalls column of pstats (before '/') - cc = nc - entry.reccallcount # ncalls column of pstats (after '/') - tt = entry.inlinetime # tottime column of pstats - ct = entry.totaltime # cumtime column of pstats - callers = {} - callersdicts[id(entry.code)] = callers - self.stats[func] = cc, nc, tt, ct, callers - # subcall information - for entry in entries: - if entry.calls: - func = label(entry.code) - for subentry in entry.calls: - try: - callers = callersdicts[id(subentry.code)] - except KeyError: - continue - nc = subentry.callcount - cc = nc - subentry.reccallcount - tt = subentry.inlinetime - ct = subentry.totaltime - if func in callers: - prev = callers[func] - nc += prev[0] - cc += prev[1] - tt += prev[2] - ct += prev[3] - callers[func] = nc, cc, tt, ct - - # The following two methods can be called by clients to use - # a profiler to profile a statement, given as a string. - - def run(self, cmd): - import __main__ - dict = __main__.__dict__ - return self.runctx(cmd, dict, dict) - - def runctx(self, cmd, globals, locals): - self.enable() - try: - exec cmd in globals, locals - finally: - self.disable() - return self - - # This method is more useful to profile a single function call. - def runcall(self, func, *args, **kw): - self.enable() - try: - return func(*args, **kw) - finally: - self.disable() - -# ____________________________________________________________ - -def label(code): - if isinstance(code, str): - return ('~', 0, code) # built-in functions ('~' sorts at the end) - else: - return (code.co_filename, code.co_firstlineno, code.co_name) - -# ____________________________________________________________ - -def main(): - import os, sys - from optparse import OptionParser - usage = "cProfile.py [-o output_file_path] [-s sort] scriptfile [arg] ..." - parser = OptionParser(usage=usage) - parser.allow_interspersed_args = False - parser.add_option('-o', '--outfile', dest="outfile", - help="Save stats to ", default=None) - parser.add_option('-s', '--sort', dest="sort", - help="Sort order when printing to stdout, based on pstats.Stats class", - default=-1) - - if not sys.argv[1:]: - parser.print_usage() - sys.exit(2) - - (options, args) = parser.parse_args() - sys.argv[:] = args - - if len(args) > 0: - progname = args[0] - sys.path.insert(0, os.path.dirname(progname)) - with open(progname, 'rb') as fp: - code = compile(fp.read(), progname, 'exec') - globs = { - '__file__': progname, - '__name__': '__main__', - '__package__': None, - } - runctx(code, globs, None, options.outfile, options.sort) - else: - parser.print_usage() - return parser - -# When invoked as main program, invoke the profiler on a script -if __name__ == '__main__': - main() diff --git a/python/Lib/calendar.py b/python/Lib/calendar.py deleted file mode 100755 index 477294448a..0000000000 --- a/python/Lib/calendar.py +++ /dev/null @@ -1,714 +0,0 @@ -"""Calendar printing functions - -Note when comparing these calendars to the ones printed by cal(1): By -default, these calendars have Monday as the first day of the week, and -Sunday as the last (the European convention). Use setfirstweekday() to -set the first day of the week (0=Monday, 6=Sunday).""" - -import sys -import datetime -import locale as _locale - -__all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday", - "firstweekday", "isleap", "leapdays", "weekday", "monthrange", - "monthcalendar", "prmonth", "month", "prcal", "calendar", - "timegm", "month_name", "month_abbr", "day_name", "day_abbr"] - -# Exception raised for bad input (with string parameter for details) -error = ValueError - -# Exceptions raised for bad input -class IllegalMonthError(ValueError): - def __init__(self, month): - self.month = month - def __str__(self): - return "bad month number %r; must be 1-12" % self.month - - -class IllegalWeekdayError(ValueError): - def __init__(self, weekday): - self.weekday = weekday - def __str__(self): - return "bad weekday number %r; must be 0 (Monday) to 6 (Sunday)" % self.weekday - - -# Constants for months referenced later -January = 1 -February = 2 - -# Number of days per month (except for February in leap years) -mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] - -# This module used to have hard-coded lists of day and month names, as -# English strings. The classes following emulate a read-only version of -# that, but supply localized names. Note that the values are computed -# fresh on each call, in case the user changes locale between calls. - -class _localized_month: - - _months = [datetime.date(2001, i+1, 1).strftime for i in range(12)] - _months.insert(0, lambda x: "") - - def __init__(self, format): - self.format = format - - def __getitem__(self, i): - funcs = self._months[i] - if isinstance(i, slice): - return [f(self.format) for f in funcs] - else: - return funcs(self.format) - - def __len__(self): - return 13 - - -class _localized_day: - - # January 1, 2001, was a Monday. - _days = [datetime.date(2001, 1, i+1).strftime for i in range(7)] - - def __init__(self, format): - self.format = format - - def __getitem__(self, i): - funcs = self._days[i] - if isinstance(i, slice): - return [f(self.format) for f in funcs] - else: - return funcs(self.format) - - def __len__(self): - return 7 - - -# Full and abbreviated names of weekdays -day_name = _localized_day('%A') -day_abbr = _localized_day('%a') - -# Full and abbreviated names of months (1-based arrays!!!) -month_name = _localized_month('%B') -month_abbr = _localized_month('%b') - -# Constants for weekdays -(MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) = range(7) - - -def isleap(year): - """Return True for leap years, False for non-leap years.""" - return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) - - -def leapdays(y1, y2): - """Return number of leap years in range [y1, y2). - Assume y1 <= y2.""" - y1 -= 1 - y2 -= 1 - return (y2//4 - y1//4) - (y2//100 - y1//100) + (y2//400 - y1//400) - - -def weekday(year, month, day): - """Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month (1-12), - day (1-31).""" - return datetime.date(year, month, day).weekday() - - -def monthrange(year, month): - """Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for - year, month.""" - if not 1 <= month <= 12: - raise IllegalMonthError(month) - day1 = weekday(year, month, 1) - ndays = mdays[month] + (month == February and isleap(year)) - return day1, ndays - - -class Calendar(object): - """ - Base calendar class. This class doesn't do any formatting. It simply - provides data to subclasses. - """ - - def __init__(self, firstweekday=0): - self.firstweekday = firstweekday # 0 = Monday, 6 = Sunday - - def getfirstweekday(self): - return self._firstweekday % 7 - - def setfirstweekday(self, firstweekday): - self._firstweekday = firstweekday - - firstweekday = property(getfirstweekday, setfirstweekday) - - def iterweekdays(self): - """ - Return an iterator for one week of weekday numbers starting with the - configured first one. - """ - for i in range(self.firstweekday, self.firstweekday + 7): - yield i%7 - - def itermonthdates(self, year, month): - """ - Return an iterator for one month. The iterator will yield datetime.date - values and will always iterate through complete weeks, so it will yield - dates outside the specified month. - """ - date = datetime.date(year, month, 1) - # Go back to the beginning of the week - days = (date.weekday() - self.firstweekday) % 7 - date -= datetime.timedelta(days=days) - oneday = datetime.timedelta(days=1) - while True: - yield date - try: - date += oneday - except OverflowError: - # Adding one day could fail after datetime.MAXYEAR - break - if date.month != month and date.weekday() == self.firstweekday: - break - - def itermonthdays2(self, year, month): - """ - Like itermonthdates(), but will yield (day number, weekday number) - tuples. For days outside the specified month the day number is 0. - """ - for i, d in enumerate(self.itermonthdays(year, month), self.firstweekday): - yield d, i % 7 - - def itermonthdays(self, year, month): - """ - Like itermonthdates(), but will yield day numbers. For days outside - the specified month the day number is 0. - """ - day1, ndays = monthrange(year, month) - days_before = (day1 - self.firstweekday) % 7 - for _ in range(days_before): - yield 0 - for d in range(1, ndays + 1): - yield d - days_after = (self.firstweekday - day1 - ndays) % 7 - for _ in range(days_after): - yield 0 - - def monthdatescalendar(self, year, month): - """ - Return a matrix (list of lists) representing a month's calendar. - Each row represents a week; week entries are datetime.date values. - """ - dates = list(self.itermonthdates(year, month)) - return [ dates[i:i+7] for i in range(0, len(dates), 7) ] - - def monthdays2calendar(self, year, month): - """ - Return a matrix representing a month's calendar. - Each row represents a week; week entries are - (day number, weekday number) tuples. Day numbers outside this month - are zero. - """ - days = list(self.itermonthdays2(year, month)) - return [ days[i:i+7] for i in range(0, len(days), 7) ] - - def monthdayscalendar(self, year, month): - """ - Return a matrix representing a month's calendar. - Each row represents a week; days outside this month are zero. - """ - days = list(self.itermonthdays(year, month)) - return [ days[i:i+7] for i in range(0, len(days), 7) ] - - def yeardatescalendar(self, year, width=3): - """ - Return the data for the specified year ready for formatting. The return - value is a list of month rows. Each month row contains up to width months. - Each month contains between 4 and 6 weeks and each week contains 1-7 - days. Days are datetime.date objects. - """ - months = [ - self.monthdatescalendar(year, i) - for i in range(January, January+12) - ] - return [months[i:i+width] for i in range(0, len(months), width) ] - - def yeardays2calendar(self, year, width=3): - """ - Return the data for the specified year ready for formatting (similar to - yeardatescalendar()). Entries in the week lists are - (day number, weekday number) tuples. Day numbers outside this month are - zero. - """ - months = [ - self.monthdays2calendar(year, i) - for i in range(January, January+12) - ] - return [months[i:i+width] for i in range(0, len(months), width) ] - - def yeardayscalendar(self, year, width=3): - """ - Return the data for the specified year ready for formatting (similar to - yeardatescalendar()). Entries in the week lists are day numbers. - Day numbers outside this month are zero. - """ - months = [ - self.monthdayscalendar(year, i) - for i in range(January, January+12) - ] - return [months[i:i+width] for i in range(0, len(months), width) ] - - -class TextCalendar(Calendar): - """ - Subclass of Calendar that outputs a calendar as a simple plain text - similar to the UNIX program cal. - """ - - def prweek(self, theweek, width): - """ - Print a single week (no newline). - """ - print self.formatweek(theweek, width), - - def formatday(self, day, weekday, width): - """ - Returns a formatted day. - """ - if day == 0: - s = '' - else: - s = '%2i' % day # right-align single-digit days - return s.center(width) - - def formatweek(self, theweek, width): - """ - Returns a single week in a string (no newline). - """ - return ' '.join(self.formatday(d, wd, width) for (d, wd) in theweek) - - def formatweekday(self, day, width): - """ - Returns a formatted week day name. - """ - if width >= 9: - names = day_name - else: - names = day_abbr - return names[day][:width].center(width) - - def formatweekheader(self, width): - """ - Return a header for a week. - """ - return ' '.join(self.formatweekday(i, width) for i in self.iterweekdays()) - - def formatmonthname(self, theyear, themonth, width, withyear=True): - """ - Return a formatted month name. - """ - s = month_name[themonth] - if withyear: - s = "%s %r" % (s, theyear) - return s.center(width) - - def prmonth(self, theyear, themonth, w=0, l=0): - """ - Print a month's calendar. - """ - print self.formatmonth(theyear, themonth, w, l), - - def formatmonth(self, theyear, themonth, w=0, l=0): - """ - Return a month's calendar string (multi-line). - """ - w = max(2, w) - l = max(1, l) - s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1) - s = s.rstrip() - s += '\n' * l - s += self.formatweekheader(w).rstrip() - s += '\n' * l - for week in self.monthdays2calendar(theyear, themonth): - s += self.formatweek(week, w).rstrip() - s += '\n' * l - return s - - def formatyear(self, theyear, w=2, l=1, c=6, m=3): - """ - Returns a year's calendar as a multi-line string. - """ - w = max(2, w) - l = max(1, l) - c = max(2, c) - colwidth = (w + 1) * 7 - 1 - v = [] - a = v.append - a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip()) - a('\n'*l) - header = self.formatweekheader(w) - for (i, row) in enumerate(self.yeardays2calendar(theyear, m)): - # months in this row - months = range(m*i+1, min(m*(i+1)+1, 13)) - a('\n'*l) - names = (self.formatmonthname(theyear, k, colwidth, False) - for k in months) - a(formatstring(names, colwidth, c).rstrip()) - a('\n'*l) - headers = (header for k in months) - a(formatstring(headers, colwidth, c).rstrip()) - a('\n'*l) - # max number of weeks for this row - height = max(len(cal) for cal in row) - for j in range(height): - weeks = [] - for cal in row: - if j >= len(cal): - weeks.append('') - else: - weeks.append(self.formatweek(cal[j], w)) - a(formatstring(weeks, colwidth, c).rstrip()) - a('\n' * l) - return ''.join(v) - - def pryear(self, theyear, w=0, l=0, c=6, m=3): - """Print a year's calendar.""" - print self.formatyear(theyear, w, l, c, m) - - -class HTMLCalendar(Calendar): - """ - This calendar returns complete HTML pages. - """ - - # CSS classes for the day s - cssclasses = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"] - - def formatday(self, day, weekday): - """ - Return a day as a table cell. - """ - if day == 0: - return ' ' # day outside month - else: - return '%d' % (self.cssclasses[weekday], day) - - def formatweek(self, theweek): - """ - Return a complete week as a table row. - """ - s = ''.join(self.formatday(d, wd) for (d, wd) in theweek) - return '%s' % s - - def formatweekday(self, day): - """ - Return a weekday name as a table header. - """ - return '%s' % (self.cssclasses[day], day_abbr[day]) - - def formatweekheader(self): - """ - Return a header for a week as a table row. - """ - s = ''.join(self.formatweekday(i) for i in self.iterweekdays()) - return '%s' % s - - def formatmonthname(self, theyear, themonth, withyear=True): - """ - Return a month name as a table row. - """ - if withyear: - s = '%s %s' % (month_name[themonth], theyear) - else: - s = '%s' % month_name[themonth] - return '%s' % s - - def formatmonth(self, theyear, themonth, withyear=True): - """ - Return a formatted month as a table. - """ - v = [] - a = v.append - a('') - a('\n') - a(self.formatmonthname(theyear, themonth, withyear=withyear)) - a('\n') - a(self.formatweekheader()) - a('\n') - for week in self.monthdays2calendar(theyear, themonth): - a(self.formatweek(week)) - a('\n') - a('
') - a('\n') - return ''.join(v) - - def formatyear(self, theyear, width=3): - """ - Return a formatted year as a table of tables. - """ - v = [] - a = v.append - width = max(width, 1) - a('') - a('\n') - a('' % (width, theyear)) - for i in range(January, January+12, width): - # months in this row - months = range(i, min(i+width, 13)) - a('') - for m in months: - a('') - a('') - a('
%s
') - a(self.formatmonth(theyear, m, withyear=False)) - a('
') - return ''.join(v) - - def formatyearpage(self, theyear, width=3, css='calendar.css', encoding=None): - """ - Return a formatted year as a complete HTML page. - """ - if encoding is None: - encoding = sys.getdefaultencoding() - v = [] - a = v.append - a('\n' % encoding) - a('\n') - a('\n') - a('\n') - a('\n' % encoding) - if css is not None: - a('\n' % css) - a('Calendar for %d\n' % theyear) - a('\n') - a('\n') - a(self.formatyear(theyear, width)) - a('\n') - a('\n') - return ''.join(v).encode(encoding, "xmlcharrefreplace") - - -class TimeEncoding: - def __init__(self, locale): - self.locale = locale - - def __enter__(self): - self.oldlocale = _locale.getlocale(_locale.LC_TIME) - _locale.setlocale(_locale.LC_TIME, self.locale) - return _locale.getlocale(_locale.LC_TIME)[1] - - def __exit__(self, *args): - _locale.setlocale(_locale.LC_TIME, self.oldlocale) - - -class LocaleTextCalendar(TextCalendar): - """ - This class can be passed a locale name in the constructor and will return - month and weekday names in the specified locale. If this locale includes - an encoding all strings containing month and weekday names will be returned - as unicode. - """ - - def __init__(self, firstweekday=0, locale=None): - TextCalendar.__init__(self, firstweekday) - if locale is None: - locale = _locale.getdefaultlocale() - self.locale = locale - - def formatweekday(self, day, width): - with TimeEncoding(self.locale) as encoding: - if width >= 9: - names = day_name - else: - names = day_abbr - name = names[day] - if encoding is not None: - name = name.decode(encoding) - return name[:width].center(width) - - def formatmonthname(self, theyear, themonth, width, withyear=True): - with TimeEncoding(self.locale) as encoding: - s = month_name[themonth] - if encoding is not None: - s = s.decode(encoding) - if withyear: - s = "%s %r" % (s, theyear) - return s.center(width) - - -class LocaleHTMLCalendar(HTMLCalendar): - """ - This class can be passed a locale name in the constructor and will return - month and weekday names in the specified locale. If this locale includes - an encoding all strings containing month and weekday names will be returned - as unicode. - """ - def __init__(self, firstweekday=0, locale=None): - HTMLCalendar.__init__(self, firstweekday) - if locale is None: - locale = _locale.getdefaultlocale() - self.locale = locale - - def formatweekday(self, day): - with TimeEncoding(self.locale) as encoding: - s = day_abbr[day] - if encoding is not None: - s = s.decode(encoding) - return '%s' % (self.cssclasses[day], s) - - def formatmonthname(self, theyear, themonth, withyear=True): - with TimeEncoding(self.locale) as encoding: - s = month_name[themonth] - if encoding is not None: - s = s.decode(encoding) - if withyear: - s = '%s %s' % (s, theyear) - return '%s' % s - - -# Support for old module level interface -c = TextCalendar() - -firstweekday = c.getfirstweekday - -def setfirstweekday(firstweekday): - try: - firstweekday.__index__ - except AttributeError: - raise IllegalWeekdayError(firstweekday) - if not MONDAY <= firstweekday <= SUNDAY: - raise IllegalWeekdayError(firstweekday) - c.firstweekday = firstweekday - -monthcalendar = c.monthdayscalendar -prweek = c.prweek -week = c.formatweek -weekheader = c.formatweekheader -prmonth = c.prmonth -month = c.formatmonth -calendar = c.formatyear -prcal = c.pryear - - -# Spacing of month columns for multi-column year calendar -_colwidth = 7*3 - 1 # Amount printed by prweek() -_spacing = 6 # Number of spaces between columns - - -def format(cols, colwidth=_colwidth, spacing=_spacing): - """Prints multi-column formatting for year calendars""" - print formatstring(cols, colwidth, spacing) - - -def formatstring(cols, colwidth=_colwidth, spacing=_spacing): - """Returns a string formatted from n strings, centered within n columns.""" - spacing *= ' ' - return spacing.join(c.center(colwidth) for c in cols) - - -EPOCH = 1970 -_EPOCH_ORD = datetime.date(EPOCH, 1, 1).toordinal() - - -def timegm(tuple): - """Unrelated but handy function to calculate Unix timestamp from GMT.""" - year, month, day, hour, minute, second = tuple[:6] - days = datetime.date(year, month, 1).toordinal() - _EPOCH_ORD + day - 1 - hours = days*24 + hour - minutes = hours*60 + minute - seconds = minutes*60 + second - return seconds - - -def main(args): - import optparse - parser = optparse.OptionParser(usage="usage: %prog [options] [year [month]]") - parser.add_option( - "-w", "--width", - dest="width", type="int", default=2, - help="width of date column (default 2, text only)" - ) - parser.add_option( - "-l", "--lines", - dest="lines", type="int", default=1, - help="number of lines for each week (default 1, text only)" - ) - parser.add_option( - "-s", "--spacing", - dest="spacing", type="int", default=6, - help="spacing between months (default 6, text only)" - ) - parser.add_option( - "-m", "--months", - dest="months", type="int", default=3, - help="months per row (default 3, text only)" - ) - parser.add_option( - "-c", "--css", - dest="css", default="calendar.css", - help="CSS to use for page (html only)" - ) - parser.add_option( - "-L", "--locale", - dest="locale", default=None, - help="locale to be used from month and weekday names" - ) - parser.add_option( - "-e", "--encoding", - dest="encoding", default=None, - help="Encoding to use for output" - ) - parser.add_option( - "-t", "--type", - dest="type", default="text", - choices=("text", "html"), - help="output type (text or html)" - ) - - (options, args) = parser.parse_args(args) - - if options.locale and not options.encoding: - parser.error("if --locale is specified --encoding is required") - sys.exit(1) - - locale = options.locale, options.encoding - - if options.type == "html": - if options.locale: - cal = LocaleHTMLCalendar(locale=locale) - else: - cal = HTMLCalendar() - encoding = options.encoding - if encoding is None: - encoding = sys.getdefaultencoding() - optdict = dict(encoding=encoding, css=options.css) - if len(args) == 1: - print cal.formatyearpage(datetime.date.today().year, **optdict) - elif len(args) == 2: - print cal.formatyearpage(int(args[1]), **optdict) - else: - parser.error("incorrect number of arguments") - sys.exit(1) - else: - if options.locale: - cal = LocaleTextCalendar(locale=locale) - else: - cal = TextCalendar() - optdict = dict(w=options.width, l=options.lines) - if len(args) != 3: - optdict["c"] = options.spacing - optdict["m"] = options.months - if len(args) == 1: - result = cal.formatyear(datetime.date.today().year, **optdict) - elif len(args) == 2: - result = cal.formatyear(int(args[1]), **optdict) - elif len(args) == 3: - result = cal.formatmonth(int(args[1]), int(args[2]), **optdict) - else: - parser.error("incorrect number of arguments") - sys.exit(1) - if options.encoding: - result = result.encode(options.encoding) - print result - - -if __name__ == "__main__": - main(sys.argv) diff --git a/python/Lib/cgi.py b/python/Lib/cgi.py deleted file mode 100755 index 7c51b44db1..0000000000 --- a/python/Lib/cgi.py +++ /dev/null @@ -1,1059 +0,0 @@ -#! /usr/local/bin/python - -# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is -# intentionally NOT "/usr/bin/env python". On many systems -# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI -# scripts, and /usr/local/bin is the default directory where Python is -# installed, so /usr/bin/env would be unable to find python. Granted, -# binary installations by Linux vendors often install Python in -# /usr/bin. So let those vendors patch cgi.py to match their choice -# of installation. - -"""Support module for CGI (Common Gateway Interface) scripts. - -This module defines a number of utilities for use by CGI scripts -written in Python. -""" - -# XXX Perhaps there should be a slimmed version that doesn't contain -# all those backwards compatible and debugging classes and functions? - -# History -# ------- -# -# Michael McLay started this module. Steve Majewski changed the -# interface to SvFormContentDict and FormContentDict. The multipart -# parsing was inspired by code submitted by Andreas Paepcke. Guido van -# Rossum rewrote, reformatted and documented the module and is currently -# responsible for its maintenance. -# - -__version__ = "2.6" - - -# Imports -# ======= - -from operator import attrgetter -import sys -import os -import UserDict -import urlparse - -from warnings import filterwarnings, catch_warnings, warn -with catch_warnings(): - if sys.py3kwarning: - filterwarnings("ignore", ".*mimetools has been removed", - DeprecationWarning) - filterwarnings("ignore", ".*rfc822 has been removed", - DeprecationWarning) - import mimetools - import rfc822 - -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -__all__ = ["MiniFieldStorage", "FieldStorage", "FormContentDict", - "SvFormContentDict", "InterpFormContentDict", "FormContent", - "parse", "parse_qs", "parse_qsl", "parse_multipart", - "parse_header", "print_exception", "print_environ", - "print_form", "print_directory", "print_arguments", - "print_environ_usage", "escape"] - -# Logging support -# =============== - -logfile = "" # Filename to log to, if not empty -logfp = None # File object to log to, if not None - -def initlog(*allargs): - """Write a log message, if there is a log file. - - Even though this function is called initlog(), you should always - use log(); log is a variable that is set either to initlog - (initially), to dolog (once the log file has been opened), or to - nolog (when logging is disabled). - - The first argument is a format string; the remaining arguments (if - any) are arguments to the % operator, so e.g. - log("%s: %s", "a", "b") - will write "a: b" to the log file, followed by a newline. - - If the global logfp is not None, it should be a file object to - which log data is written. - - If the global logfp is None, the global logfile may be a string - giving a filename to open, in append mode. This file should be - world writable!!! If the file can't be opened, logging is - silently disabled (since there is no safe place where we could - send an error message). - - """ - global logfp, log - if logfile and not logfp: - try: - logfp = open(logfile, "a") - except IOError: - pass - if not logfp: - log = nolog - else: - log = dolog - log(*allargs) - -def dolog(fmt, *args): - """Write a log message to the log file. See initlog() for docs.""" - logfp.write(fmt%args + "\n") - -def nolog(*allargs): - """Dummy function, assigned to log when logging is disabled.""" - pass - -log = initlog # The current logging function - - -# Parsing functions -# ================= - -# Maximum input we will accept when REQUEST_METHOD is POST -# 0 ==> unlimited input -maxlen = 0 - -def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): - """Parse a query in the environment or from a file (default stdin) - - Arguments, all optional: - - fp : file pointer; default: sys.stdin - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - percent-encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - """ - if fp is None: - fp = sys.stdin - if not 'REQUEST_METHOD' in environ: - environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone - if environ['REQUEST_METHOD'] == 'POST': - ctype, pdict = parse_header(environ['CONTENT_TYPE']) - if ctype == 'multipart/form-data': - return parse_multipart(fp, pdict) - elif ctype == 'application/x-www-form-urlencoded': - clength = int(environ['CONTENT_LENGTH']) - if maxlen and clength > maxlen: - raise ValueError, 'Maximum content length exceeded' - qs = fp.read(clength) - else: - qs = '' # Unknown content-type - if 'QUERY_STRING' in environ: - if qs: qs = qs + '&' - qs = qs + environ['QUERY_STRING'] - elif sys.argv[1:]: - if qs: qs = qs + '&' - qs = qs + sys.argv[1] - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - elif 'QUERY_STRING' in environ: - qs = environ['QUERY_STRING'] - else: - if sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) - - -# parse query string function called from urlparse, -# this is done in order to maintain backward compatibility. - -def parse_qs(qs, keep_blank_values=0, strict_parsing=0): - """Parse a query given as a string argument.""" - warn("cgi.parse_qs is deprecated, use urlparse.parse_qs instead", - PendingDeprecationWarning, 2) - return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) - - -def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): - """Parse a query given as a string argument.""" - warn("cgi.parse_qsl is deprecated, use urlparse.parse_qsl instead", - PendingDeprecationWarning, 2) - return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing) - -def parse_multipart(fp, pdict): - """Parse multipart input. - - Arguments: - fp : input file - pdict: dictionary containing other parameters of content-type header - - Returns a dictionary just like parse_qs(): keys are the field names, each - value is a list of values for that field. This is easy to use but not - much good if you are expecting megabytes to be uploaded -- in that case, - use the FieldStorage class instead which is much more flexible. Note - that content-type is the raw, unparsed contents of the content-type - header. - - XXX This does not parse nested multipart parts -- use FieldStorage for - that. - - XXX This should really be subsumed by FieldStorage altogether -- no - point in having two implementations of the same parsing algorithm. - Also, FieldStorage protects itself better against certain DoS attacks - by limiting the size of the data read in one chunk. The API here - does not support that kind of protection. This also affects parse() - since it can call parse_multipart(). - - """ - boundary = "" - if 'boundary' in pdict: - boundary = pdict['boundary'] - if not valid_boundary(boundary): - raise ValueError, ('Invalid boundary in multipart form: %r' - % (boundary,)) - - nextpart = "--" + boundary - lastpart = "--" + boundary + "--" - partdict = {} - terminator = "" - - while terminator != lastpart: - bytes = -1 - data = None - if terminator: - # At start of next part. Read headers first. - headers = mimetools.Message(fp) - clength = headers.getheader('content-length') - if clength: - try: - bytes = int(clength) - except ValueError: - pass - if bytes > 0: - if maxlen and bytes > maxlen: - raise ValueError, 'Maximum content length exceeded' - data = fp.read(bytes) - else: - data = "" - # Read lines until end of part. - lines = [] - while 1: - line = fp.readline() - if not line: - terminator = lastpart # End outer loop - break - if line[:2] == "--": - terminator = line.strip() - if terminator in (nextpart, lastpart): - break - lines.append(line) - # Done with part. - if data is None: - continue - if bytes < 0: - if lines: - # Strip final line terminator - line = lines[-1] - if line[-2:] == "\r\n": - line = line[:-2] - elif line[-1:] == "\n": - line = line[:-1] - lines[-1] = line - data = "".join(lines) - line = headers['content-disposition'] - if not line: - continue - key, params = parse_header(line) - if key != 'form-data': - continue - if 'name' in params: - name = params['name'] - else: - continue - if name in partdict: - partdict[name].append(data) - else: - partdict[name] = [data] - - return partdict - - -def _parseparam(s): - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - yield f.strip() - s = s[end:] - -def parse_header(line): - """Parse a Content-type like header. - - Return the main content-type and a dictionary of options. - - """ - parts = _parseparam(';' + line) - key = parts.next() - pdict = {} - for p in parts: - i = p.find('=') - if i >= 0: - name = p[:i].strip().lower() - value = p[i+1:].strip() - if len(value) >= 2 and value[0] == value[-1] == '"': - value = value[1:-1] - value = value.replace('\\\\', '\\').replace('\\"', '"') - pdict[name] = value - return key, pdict - - -# Classes for field storage -# ========================= - -class MiniFieldStorage: - - """Like FieldStorage, for use when no file uploads are possible.""" - - # Dummy attributes - filename = None - list = None - type = None - file = None - type_options = {} - disposition = None - disposition_options = {} - headers = {} - - def __init__(self, name, value): - """Constructor from field name and value.""" - self.name = name - self.value = value - # self.file = StringIO(value) - - def __repr__(self): - """Return printable representation.""" - return "MiniFieldStorage(%r, %r)" % (self.name, self.value) - - -class FieldStorage: - - """Store a sequence of fields, reading multipart/form-data. - - This class provides naming, typing, files stored on disk, and - more. At the top level, it is accessible like a dictionary, whose - keys are the field names. (Note: None can occur as a field name.) - The items are either a Python list (if there's multiple values) or - another FieldStorage or MiniFieldStorage object. If it's a single - object, it has the following attributes: - - name: the field name, if specified; otherwise None - - filename: the filename, if specified; otherwise None; this is the - client side filename, *not* the file name on which it is - stored (that's a temporary file you don't deal with) - - value: the value as a *string*; for file uploads, this - transparently reads the file every time you request the value - - file: the file(-like) object from which you can read the data; - None if the data is stored a simple string - - type: the content-type, or None if not specified - - type_options: dictionary of options specified on the content-type - line - - disposition: content-disposition, or None if not specified - - disposition_options: dictionary of corresponding options - - headers: a dictionary(-like) object (sometimes rfc822.Message or a - subclass thereof) containing *all* headers - - The class is subclassable, mostly for the purpose of overriding - the make_file() method, which is called internally to come up with - a file open for reading and writing. This makes it possible to - override the default choice of storing all files in a temporary - directory and unlinking them as soon as they have been opened. - - """ - - def __init__(self, fp=None, headers=None, outerboundary="", - environ=os.environ, keep_blank_values=0, strict_parsing=0): - """Constructor. Read multipart/* until last part. - - Arguments, all optional: - - fp : file pointer; default: sys.stdin - (not used when the request method is GET) - - headers : header dictionary-like object; default: - taken from environ as per CGI spec - - outerboundary : terminating multipart boundary - (for internal use only) - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - percent-encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - """ - method = 'GET' - self.keep_blank_values = keep_blank_values - self.strict_parsing = strict_parsing - if 'REQUEST_METHOD' in environ: - method = environ['REQUEST_METHOD'].upper() - self.qs_on_post = None - if method == 'GET' or method == 'HEAD': - if 'QUERY_STRING' in environ: - qs = environ['QUERY_STRING'] - elif sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - fp = StringIO(qs) - if headers is None: - headers = {'content-type': - "application/x-www-form-urlencoded"} - if headers is None: - headers = {} - if method == 'POST': - # Set default content-type for POST to what's traditional - headers['content-type'] = "application/x-www-form-urlencoded" - if 'CONTENT_TYPE' in environ: - headers['content-type'] = environ['CONTENT_TYPE'] - if 'QUERY_STRING' in environ: - self.qs_on_post = environ['QUERY_STRING'] - if 'CONTENT_LENGTH' in environ: - headers['content-length'] = environ['CONTENT_LENGTH'] - self.fp = fp or sys.stdin - self.headers = headers - self.outerboundary = outerboundary - - # Process content-disposition header - cdisp, pdict = "", {} - if 'content-disposition' in self.headers: - cdisp, pdict = parse_header(self.headers['content-disposition']) - self.disposition = cdisp - self.disposition_options = pdict - self.name = None - if 'name' in pdict: - self.name = pdict['name'] - self.filename = None - if 'filename' in pdict: - self.filename = pdict['filename'] - - # Process content-type header - # - # Honor any existing content-type header. But if there is no - # content-type header, use some sensible defaults. Assume - # outerboundary is "" at the outer level, but something non-false - # inside a multi-part. The default for an inner part is text/plain, - # but for an outer part it should be urlencoded. This should catch - # bogus clients which erroneously forget to include a content-type - # header. - # - # See below for what we do if there does exist a content-type header, - # but it happens to be something we don't understand. - if 'content-type' in self.headers: - ctype, pdict = parse_header(self.headers['content-type']) - elif self.outerboundary or method != 'POST': - ctype, pdict = "text/plain", {} - else: - ctype, pdict = 'application/x-www-form-urlencoded', {} - self.type = ctype - self.type_options = pdict - self.innerboundary = "" - if 'boundary' in pdict: - self.innerboundary = pdict['boundary'] - clen = -1 - if 'content-length' in self.headers: - try: - clen = int(self.headers['content-length']) - except ValueError: - pass - if maxlen and clen > maxlen: - raise ValueError, 'Maximum content length exceeded' - self.length = clen - - self.list = self.file = None - self.done = 0 - if ctype == 'application/x-www-form-urlencoded': - self.read_urlencoded() - elif ctype[:10] == 'multipart/': - self.read_multi(environ, keep_blank_values, strict_parsing) - else: - self.read_single() - - def __repr__(self): - """Return a printable representation.""" - return "FieldStorage(%r, %r, %r)" % ( - self.name, self.filename, self.value) - - def __iter__(self): - return iter(self.keys()) - - def __getattr__(self, name): - if name != 'value': - raise AttributeError, name - if self.file: - self.file.seek(0) - value = self.file.read() - self.file.seek(0) - elif self.list is not None: - value = self.list - else: - value = None - return value - - def __getitem__(self, key): - """Dictionary style indexing.""" - if self.list is None: - raise TypeError, "not indexable" - found = [] - for item in self.list: - if item.name == key: found.append(item) - if not found: - raise KeyError, key - if len(found) == 1: - return found[0] - else: - return found - - def getvalue(self, key, default=None): - """Dictionary style get() method, including 'value' lookup.""" - if key in self: - value = self[key] - if type(value) is type([]): - return map(attrgetter('value'), value) - else: - return value.value - else: - return default - - def getfirst(self, key, default=None): - """ Return the first value received.""" - if key in self: - value = self[key] - if type(value) is type([]): - return value[0].value - else: - return value.value - else: - return default - - def getlist(self, key): - """ Return list of received values.""" - if key in self: - value = self[key] - if type(value) is type([]): - return map(attrgetter('value'), value) - else: - return [value.value] - else: - return [] - - def keys(self): - """Dictionary style keys() method.""" - if self.list is None: - raise TypeError, "not indexable" - return list(set(item.name for item in self.list)) - - def has_key(self, key): - """Dictionary style has_key() method.""" - if self.list is None: - raise TypeError, "not indexable" - return any(item.name == key for item in self.list) - - def __contains__(self, key): - """Dictionary style __contains__ method.""" - if self.list is None: - raise TypeError, "not indexable" - return any(item.name == key for item in self.list) - - def __len__(self): - """Dictionary style len(x) support.""" - return len(self.keys()) - - def __nonzero__(self): - return bool(self.list) - - def read_urlencoded(self): - """Internal: read data in query string format.""" - qs = self.fp.read(self.length) - if self.qs_on_post: - qs += '&' + self.qs_on_post - self.list = list = [] - for key, value in urlparse.parse_qsl(qs, self.keep_blank_values, - self.strict_parsing): - list.append(MiniFieldStorage(key, value)) - self.skip_lines() - - FieldStorageClass = None - - def read_multi(self, environ, keep_blank_values, strict_parsing): - """Internal: read a part that is itself multipart.""" - ib = self.innerboundary - if not valid_boundary(ib): - raise ValueError, 'Invalid boundary in multipart form: %r' % (ib,) - self.list = [] - if self.qs_on_post: - for key, value in urlparse.parse_qsl(self.qs_on_post, - self.keep_blank_values, self.strict_parsing): - self.list.append(MiniFieldStorage(key, value)) - FieldStorageClass = None - - klass = self.FieldStorageClass or self.__class__ - part = klass(self.fp, {}, ib, - environ, keep_blank_values, strict_parsing) - # Throw first part away - while not part.done: - headers = rfc822.Message(self.fp) - part = klass(self.fp, headers, ib, - environ, keep_blank_values, strict_parsing) - self.list.append(part) - self.skip_lines() - - def read_single(self): - """Internal: read an atomic part.""" - if self.length >= 0: - self.read_binary() - self.skip_lines() - else: - self.read_lines() - self.file.seek(0) - - bufsize = 8*1024 # I/O buffering size for copy to file - - def read_binary(self): - """Internal: read binary data.""" - self.file = self.make_file('b') - todo = self.length - if todo >= 0: - while todo > 0: - data = self.fp.read(min(todo, self.bufsize)) - if not data: - self.done = -1 - break - self.file.write(data) - todo = todo - len(data) - - def read_lines(self): - """Internal: read lines until EOF or outerboundary.""" - self.file = self.__file = StringIO() - if self.outerboundary: - self.read_lines_to_outerboundary() - else: - self.read_lines_to_eof() - - def __write(self, line): - if self.__file is not None: - if self.__file.tell() + len(line) > 1000: - self.file = self.make_file('') - self.file.write(self.__file.getvalue()) - self.__file = None - self.file.write(line) - - def read_lines_to_eof(self): - """Internal: read lines until EOF.""" - while 1: - line = self.fp.readline(1<<16) - if not line: - self.done = -1 - break - self.__write(line) - - def read_lines_to_outerboundary(self): - """Internal: read lines until outerboundary.""" - next = "--" + self.outerboundary - last = next + "--" - delim = "" - last_line_lfend = True - while 1: - line = self.fp.readline(1<<16) - if not line: - self.done = -1 - break - if delim == "\r": - line = delim + line - delim = "" - if line[:2] == "--" and last_line_lfend: - strippedline = line.strip() - if strippedline == next: - break - if strippedline == last: - self.done = 1 - break - odelim = delim - if line[-2:] == "\r\n": - delim = "\r\n" - line = line[:-2] - last_line_lfend = True - elif line[-1] == "\n": - delim = "\n" - line = line[:-1] - last_line_lfend = True - elif line[-1] == "\r": - # We may interrupt \r\n sequences if they span the 2**16 - # byte boundary - delim = "\r" - line = line[:-1] - last_line_lfend = False - else: - delim = "" - last_line_lfend = False - self.__write(odelim + line) - - def skip_lines(self): - """Internal: skip lines until outer boundary if defined.""" - if not self.outerboundary or self.done: - return - next = "--" + self.outerboundary - last = next + "--" - last_line_lfend = True - while 1: - line = self.fp.readline(1<<16) - if not line: - self.done = -1 - break - if line[:2] == "--" and last_line_lfend: - strippedline = line.strip() - if strippedline == next: - break - if strippedline == last: - self.done = 1 - break - last_line_lfend = line.endswith('\n') - - def make_file(self, binary=None): - """Overridable: return a readable & writable file. - - The file will be used as follows: - - data is written to it - - seek(0) - - data is read from it - - The 'binary' argument is unused -- the file is always opened - in binary mode. - - This version opens a temporary file for reading and writing, - and immediately deletes (unlinks) it. The trick (on Unix!) is - that the file can still be used, but it can't be opened by - another process, and it will automatically be deleted when it - is closed or when the current process terminates. - - If you want a more permanent file, you derive a class which - overrides this method. If you want a visible temporary file - that is nevertheless automatically deleted when the script - terminates, try defining a __del__ method in a derived class - which unlinks the temporary files you have created. - - """ - import tempfile - return tempfile.TemporaryFile("w+b") - - - -# Backwards Compatibility Classes -# =============================== - -class FormContentDict(UserDict.UserDict): - """Form content as dictionary with a list of values per field. - - form = FormContentDict() - - form[key] -> [value, value, ...] - key in form -> Boolean - form.keys() -> [key, key, ...] - form.values() -> [[val, val, ...], [val, val, ...], ...] - form.items() -> [(key, [val, val, ...]), (key, [val, val, ...]), ...] - form.dict == {key: [val, val, ...], ...} - - """ - def __init__(self, environ=os.environ, keep_blank_values=0, strict_parsing=0): - self.dict = self.data = parse(environ=environ, - keep_blank_values=keep_blank_values, - strict_parsing=strict_parsing) - self.query_string = environ['QUERY_STRING'] - - -class SvFormContentDict(FormContentDict): - """Form content as dictionary expecting a single value per field. - - If you only expect a single value for each field, then form[key] - will return that single value. It will raise an IndexError if - that expectation is not true. If you expect a field to have - possible multiple values, than you can use form.getlist(key) to - get all of the values. values() and items() are a compromise: - they return single strings where there is a single value, and - lists of strings otherwise. - - """ - def __getitem__(self, key): - if len(self.dict[key]) > 1: - raise IndexError, 'expecting a single value' - return self.dict[key][0] - def getlist(self, key): - return self.dict[key] - def values(self): - result = [] - for value in self.dict.values(): - if len(value) == 1: - result.append(value[0]) - else: result.append(value) - return result - def items(self): - result = [] - for key, value in self.dict.items(): - if len(value) == 1: - result.append((key, value[0])) - else: result.append((key, value)) - return result - - -class InterpFormContentDict(SvFormContentDict): - """This class is present for backwards compatibility only.""" - def __getitem__(self, key): - v = SvFormContentDict.__getitem__(self, key) - if v[0] in '0123456789+-.': - try: return int(v) - except ValueError: - try: return float(v) - except ValueError: pass - return v.strip() - def values(self): - result = [] - for key in self.keys(): - try: - result.append(self[key]) - except IndexError: - result.append(self.dict[key]) - return result - def items(self): - result = [] - for key in self.keys(): - try: - result.append((key, self[key])) - except IndexError: - result.append((key, self.dict[key])) - return result - - -class FormContent(FormContentDict): - """This class is present for backwards compatibility only.""" - def values(self, key): - if key in self.dict :return self.dict[key] - else: return None - def indexed_value(self, key, location): - if key in self.dict: - if len(self.dict[key]) > location: - return self.dict[key][location] - else: return None - else: return None - def value(self, key): - if key in self.dict: return self.dict[key][0] - else: return None - def length(self, key): - return len(self.dict[key]) - def stripped(self, key): - if key in self.dict: return self.dict[key][0].strip() - else: return None - def pars(self): - return self.dict - - -# Test/debug code -# =============== - -def test(environ=os.environ): - """Robust test CGI script, usable as main program. - - Write minimal HTTP headers and dump all information provided to - the script in HTML form. - - """ - print "Content-type: text/html" - print - sys.stderr = sys.stdout - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - print_environ_usage() - def f(): - exec "testing print_exception() -- italics?" - def g(f=f): - f() - print "

What follows is a test, not an actual exception:

" - g() - except: - print_exception() - - print "

Second try with a small maxlen...

" - - global maxlen - maxlen = 50 - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - except: - print_exception() - -def print_exception(type=None, value=None, tb=None, limit=None): - if type is None: - type, value, tb = sys.exc_info() - import traceback - print - print "

Traceback (most recent call last):

" - list = traceback.format_tb(tb, limit) + \ - traceback.format_exception_only(type, value) - print "
%s%s
" % ( - escape("".join(list[:-1])), - escape(list[-1]), - ) - del tb - -def print_environ(environ=os.environ): - """Dump the shell environment as HTML.""" - keys = environ.keys() - keys.sort() - print - print "

Shell Environment:

" - print "
" - for key in keys: - print "
", escape(key), "
", escape(environ[key]) - print "
" - print - -def print_form(form): - """Dump the contents of a form as HTML.""" - keys = form.keys() - keys.sort() - print - print "

Form Contents:

" - if not keys: - print "

No form fields." - print "

" - for key in keys: - print "
" + escape(key) + ":", - value = form[key] - print "" + escape(repr(type(value))) + "" - print "
" + escape(repr(value)) - print "
" - print - -def print_directory(): - """Dump the current directory as HTML.""" - print - print "

Current Working Directory:

" - try: - pwd = os.getcwd() - except os.error, msg: - print "os.error:", escape(str(msg)) - else: - print escape(pwd) - print - -def print_arguments(): - print - print "

Command Line Arguments:

" - print - print sys.argv - print - -def print_environ_usage(): - """Dump a list of environment variables used by CGI as HTML.""" - print """ -

These environment variables could have been set:

-
    -
  • AUTH_TYPE -
  • CONTENT_LENGTH -
  • CONTENT_TYPE -
  • DATE_GMT -
  • DATE_LOCAL -
  • DOCUMENT_NAME -
  • DOCUMENT_ROOT -
  • DOCUMENT_URI -
  • GATEWAY_INTERFACE -
  • LAST_MODIFIED -
  • PATH -
  • PATH_INFO -
  • PATH_TRANSLATED -
  • QUERY_STRING -
  • REMOTE_ADDR -
  • REMOTE_HOST -
  • REMOTE_IDENT -
  • REMOTE_USER -
  • REQUEST_METHOD -
  • SCRIPT_NAME -
  • SERVER_NAME -
  • SERVER_PORT -
  • SERVER_PROTOCOL -
  • SERVER_ROOT -
  • SERVER_SOFTWARE -
-In addition, HTTP headers sent by the server may be passed in the -environment as well. Here are some common variable names: -
    -
  • HTTP_ACCEPT -
  • HTTP_CONNECTION -
  • HTTP_HOST -
  • HTTP_PRAGMA -
  • HTTP_REFERER -
  • HTTP_USER_AGENT -
-""" - - -# Utilities -# ========= - -def escape(s, quote=None): - '''Replace special characters "&", "<" and ">" to HTML-safe sequences. - If the optional flag quote is true, the quotation mark character (") - is also translated.''' - s = s.replace("&", "&") # Must be done first! - s = s.replace("<", "<") - s = s.replace(">", ">") - if quote: - s = s.replace('"', """) - return s - -def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"): - import re - return re.match(_vb_pattern, s) - -# Invoke mainline -# =============== - -# Call test() when this file is run as a script (not imported as a module) -if __name__ == '__main__': - test() diff --git a/python/Lib/cgitb.py b/python/Lib/cgitb.py deleted file mode 100755 index 8acc4b75fe..0000000000 --- a/python/Lib/cgitb.py +++ /dev/null @@ -1,323 +0,0 @@ -"""More comprehensive traceback formatting for Python scripts. - -To enable this module, do: - - import cgitb; cgitb.enable() - -at the top of your script. The optional arguments to enable() are: - - display - if true, tracebacks are displayed in the web browser - logdir - if set, tracebacks are written to files in this directory - context - number of lines of source code to show for each stack frame - format - 'text' or 'html' controls the output format - -By default, tracebacks are displayed but not saved, the context is 5 lines -and the output format is 'html' (for backwards compatibility with the -original use of this module) - -Alternatively, if you have caught an exception and want cgitb to display it -for you, call cgitb.handler(). The optional argument to handler() is a -3-item tuple (etype, evalue, etb) just like the value of sys.exc_info(). -The default handler displays output as HTML. - -""" -import inspect -import keyword -import linecache -import os -import pydoc -import sys -import tempfile -import time -import tokenize -import traceback -import types - -def reset(): - """Return a string that resets the CGI and browser to a known state.""" - return ''' - --> --> - - ''' - -__UNDEF__ = [] # a special sentinel object -def small(text): - if text: - return '' + text + '' - else: - return '' - -def strong(text): - if text: - return '' + text + '' - else: - return '' - -def grey(text): - if text: - return '' + text + '' - else: - return '' - -def lookup(name, frame, locals): - """Find the value for a given name in the given environment.""" - if name in locals: - return 'local', locals[name] - if name in frame.f_globals: - return 'global', frame.f_globals[name] - if '__builtins__' in frame.f_globals: - builtins = frame.f_globals['__builtins__'] - if type(builtins) is type({}): - if name in builtins: - return 'builtin', builtins[name] - else: - if hasattr(builtins, name): - return 'builtin', getattr(builtins, name) - return None, __UNDEF__ - -def scanvars(reader, frame, locals): - """Scan one logical line of Python and look up values of variables used.""" - vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__ - for ttype, token, start, end, line in tokenize.generate_tokens(reader): - if ttype == tokenize.NEWLINE: break - if ttype == tokenize.NAME and token not in keyword.kwlist: - if lasttoken == '.': - if parent is not __UNDEF__: - value = getattr(parent, token, __UNDEF__) - vars.append((prefix + token, prefix, value)) - else: - where, value = lookup(token, frame, locals) - vars.append((token, where, value)) - elif token == '.': - prefix += lasttoken + '.' - parent = value - else: - parent, prefix = None, '' - lasttoken = token - return vars - -def html(einfo, context=5): - """Return a nice HTML document describing a given traceback.""" - etype, evalue, etb = einfo - if type(etype) is types.ClassType: - etype = etype.__name__ - pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable - date = time.ctime(time.time()) - head = '' + pydoc.html.heading( - '%s' % - strong(pydoc.html.escape(str(etype))), - '#ffffff', '#6622aa', pyver + '
' + date) + ''' -

A problem occurred in a Python script. Here is the sequence of -function calls leading up to the error, in the order they occurred.

''' - - indent = '' + small(' ' * 5) + ' ' - frames = [] - records = inspect.getinnerframes(etb, context) - for frame, file, lnum, func, lines, index in records: - if file: - file = os.path.abspath(file) - link = '%s' % (file, pydoc.html.escape(file)) - else: - file = link = '?' - args, varargs, varkw, locals = inspect.getargvalues(frame) - call = '' - if func != '?': - call = 'in ' + strong(func) + \ - inspect.formatargvalues(args, varargs, varkw, locals, - formatvalue=lambda value: '=' + pydoc.html.repr(value)) - - highlight = {} - def reader(lnum=[lnum]): - highlight[lnum[0]] = 1 - try: return linecache.getline(file, lnum[0]) - finally: lnum[0] += 1 - vars = scanvars(reader, frame, locals) - - rows = ['%s%s %s' % - (' ', link, call)] - if index is not None: - i = lnum - index - for line in lines: - num = small(' ' * (5-len(str(i))) + str(i)) + ' ' - if i in highlight: - line = '=>%s%s' % (num, pydoc.html.preformat(line)) - rows.append('%s' % line) - else: - line = '  %s%s' % (num, pydoc.html.preformat(line)) - rows.append('%s' % grey(line)) - i += 1 - - done, dump = {}, [] - for name, where, value in vars: - if name in done: continue - done[name] = 1 - if value is not __UNDEF__: - if where in ('global', 'builtin'): - name = ('%s ' % where) + strong(name) - elif where == 'local': - name = strong(name) - else: - name = where + strong(name.split('.')[-1]) - dump.append('%s = %s' % (name, pydoc.html.repr(value))) - else: - dump.append(name + ' undefined') - - rows.append('%s' % small(grey(', '.join(dump)))) - frames.append(''' - -%s
''' % '\n'.join(rows)) - - exception = ['

%s: %s' % (strong(pydoc.html.escape(str(etype))), - pydoc.html.escape(str(evalue)))] - if isinstance(evalue, BaseException): - for name in dir(evalue): - if name[:1] == '_': continue - value = pydoc.html.repr(getattr(evalue, name)) - exception.append('\n
%s%s =\n%s' % (indent, name, value)) - - return head + ''.join(frames) + ''.join(exception) + ''' - - - -''' % pydoc.html.escape( - ''.join(traceback.format_exception(etype, evalue, etb))) - -def text(einfo, context=5): - """Return a plain text document describing a given traceback.""" - etype, evalue, etb = einfo - if type(etype) is types.ClassType: - etype = etype.__name__ - pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable - date = time.ctime(time.time()) - head = "%s\n%s\n%s\n" % (str(etype), pyver, date) + ''' -A problem occurred in a Python script. Here is the sequence of -function calls leading up to the error, in the order they occurred. -''' - - frames = [] - records = inspect.getinnerframes(etb, context) - for frame, file, lnum, func, lines, index in records: - file = file and os.path.abspath(file) or '?' - args, varargs, varkw, locals = inspect.getargvalues(frame) - call = '' - if func != '?': - call = 'in ' + func + \ - inspect.formatargvalues(args, varargs, varkw, locals, - formatvalue=lambda value: '=' + pydoc.text.repr(value)) - - highlight = {} - def reader(lnum=[lnum]): - highlight[lnum[0]] = 1 - try: return linecache.getline(file, lnum[0]) - finally: lnum[0] += 1 - vars = scanvars(reader, frame, locals) - - rows = [' %s %s' % (file, call)] - if index is not None: - i = lnum - index - for line in lines: - num = '%5d ' % i - rows.append(num+line.rstrip()) - i += 1 - - done, dump = {}, [] - for name, where, value in vars: - if name in done: continue - done[name] = 1 - if value is not __UNDEF__: - if where == 'global': name = 'global ' + name - elif where != 'local': name = where + name.split('.')[-1] - dump.append('%s = %s' % (name, pydoc.text.repr(value))) - else: - dump.append(name + ' undefined') - - rows.append('\n'.join(dump)) - frames.append('\n%s\n' % '\n'.join(rows)) - - exception = ['%s: %s' % (str(etype), str(evalue))] - if isinstance(evalue, BaseException): - for name in dir(evalue): - value = pydoc.text.repr(getattr(evalue, name)) - exception.append('\n%s%s = %s' % (" "*4, name, value)) - - return head + ''.join(frames) + ''.join(exception) + ''' - -The above is a description of an error in a Python program. Here is -the original traceback: - -%s -''' % ''.join(traceback.format_exception(etype, evalue, etb)) - -class Hook: - """A hook to replace sys.excepthook that shows tracebacks in HTML.""" - - def __init__(self, display=1, logdir=None, context=5, file=None, - format="html"): - self.display = display # send tracebacks to browser if true - self.logdir = logdir # log tracebacks to files if not None - self.context = context # number of source code lines per frame - self.file = file or sys.stdout # place to send the output - self.format = format - - def __call__(self, etype, evalue, etb): - self.handle((etype, evalue, etb)) - - def handle(self, info=None): - info = info or sys.exc_info() - if self.format == "html": - self.file.write(reset()) - - formatter = (self.format=="html") and html or text - plain = False - try: - doc = formatter(info, self.context) - except: # just in case something goes wrong - doc = ''.join(traceback.format_exception(*info)) - plain = True - - if self.display: - if plain: - doc = doc.replace('&', '&').replace('<', '<') - self.file.write('

' + doc + '
\n') - else: - self.file.write(doc + '\n') - else: - self.file.write('

A problem occurred in a Python script.\n') - - if self.logdir is not None: - suffix = ['.txt', '.html'][self.format=="html"] - (fd, path) = tempfile.mkstemp(suffix=suffix, dir=self.logdir) - - try: - file = os.fdopen(fd, 'w') - file.write(doc) - file.close() - msg = '%s contains the description of this error.' % path - except: - msg = 'Tried to save traceback to %s, but failed.' % path - - if self.format == 'html': - self.file.write('

%s

\n' % msg) - else: - self.file.write(msg + '\n') - try: - self.file.flush() - except: pass - -handler = Hook().handle -def enable(display=1, logdir=None, context=5, format="html"): - """Install an exception handler that formats tracebacks as HTML. - - The optional argument 'display' can be set to 0 to suppress sending the - traceback to the browser, and 'logdir' can be set to a directory to cause - tracebacks to be written to files there.""" - sys.excepthook = Hook(display=display, logdir=logdir, - context=context, format=format) diff --git a/python/Lib/chunk.py b/python/Lib/chunk.py deleted file mode 100755 index d2020cc7a7..0000000000 --- a/python/Lib/chunk.py +++ /dev/null @@ -1,169 +0,0 @@ -"""Simple class to read IFF chunks. - -An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File -Format)) has the following structure: - -+----------------+ -| ID (4 bytes) | -+----------------+ -| size (4 bytes) | -+----------------+ -| data | -| ... | -+----------------+ - -The ID is a 4-byte string which identifies the type of chunk. - -The size field (a 32-bit value, encoded using big-endian byte order) -gives the size of the whole chunk, including the 8-byte header. - -Usually an IFF-type file consists of one or more chunks. The proposed -usage of the Chunk class defined here is to instantiate an instance at -the start of each chunk and read from the instance until it reaches -the end, after which a new instance can be instantiated. At the end -of the file, creating a new instance will fail with an EOFError -exception. - -Usage: -while True: - try: - chunk = Chunk(file) - except EOFError: - break - chunktype = chunk.getname() - while True: - data = chunk.read(nbytes) - if not data: - pass - # do something with data - -The interface is file-like. The implemented methods are: -read, close, seek, tell, isatty. -Extra methods are: skip() (called by close, skips to the end of the chunk), -getname() (returns the name (ID) of the chunk) - -The __init__ method has one required argument, a file-like object -(including a chunk instance), and one optional argument, a flag which -specifies whether or not chunks are aligned on 2-byte boundaries. The -default is 1, i.e. aligned. -""" - -class Chunk: - def __init__(self, file, align=True, bigendian=True, inclheader=False): - import struct - self.closed = False - self.align = align # whether to align to word (2-byte) boundaries - if bigendian: - strflag = '>' - else: - strflag = '<' - self.file = file - self.chunkname = file.read(4) - if len(self.chunkname) < 4: - raise EOFError - try: - self.chunksize = struct.unpack(strflag+'L', file.read(4))[0] - except struct.error: - raise EOFError - if inclheader: - self.chunksize = self.chunksize - 8 # subtract header - self.size_read = 0 - try: - self.offset = self.file.tell() - except (AttributeError, IOError): - self.seekable = False - else: - self.seekable = True - - def getname(self): - """Return the name (ID) of the current chunk.""" - return self.chunkname - - def getsize(self): - """Return the size of the current chunk.""" - return self.chunksize - - def close(self): - if not self.closed: - try: - self.skip() - finally: - self.closed = True - - def isatty(self): - if self.closed: - raise ValueError, "I/O operation on closed file" - return False - - def seek(self, pos, whence=0): - """Seek to specified position into the chunk. - Default position is 0 (start of chunk). - If the file is not seekable, this will result in an error. - """ - - if self.closed: - raise ValueError, "I/O operation on closed file" - if not self.seekable: - raise IOError, "cannot seek" - if whence == 1: - pos = pos + self.size_read - elif whence == 2: - pos = pos + self.chunksize - if pos < 0 or pos > self.chunksize: - raise RuntimeError - self.file.seek(self.offset + pos, 0) - self.size_read = pos - - def tell(self): - if self.closed: - raise ValueError, "I/O operation on closed file" - return self.size_read - - def read(self, size=-1): - """Read at most size bytes from the chunk. - If size is omitted or negative, read until the end - of the chunk. - """ - - if self.closed: - raise ValueError, "I/O operation on closed file" - if self.size_read >= self.chunksize: - return '' - if size < 0: - size = self.chunksize - self.size_read - if size > self.chunksize - self.size_read: - size = self.chunksize - self.size_read - data = self.file.read(size) - self.size_read = self.size_read + len(data) - if self.size_read == self.chunksize and \ - self.align and \ - (self.chunksize & 1): - dummy = self.file.read(1) - self.size_read = self.size_read + len(dummy) - return data - - def skip(self): - """Skip the rest of the chunk. - If you are not interested in the contents of the chunk, - this method should be called so that the file points to - the start of the next chunk. - """ - - if self.closed: - raise ValueError, "I/O operation on closed file" - if self.seekable: - try: - n = self.chunksize - self.size_read - # maybe fix alignment - if self.align and (self.chunksize & 1): - n = n + 1 - self.file.seek(n, 1) - self.size_read = self.size_read + n - return - except IOError: - pass - while self.size_read < self.chunksize: - n = min(8192, self.chunksize - self.size_read) - dummy = self.read(n) - if not dummy: - raise EOFError diff --git a/python/Lib/cmd.py b/python/Lib/cmd.py deleted file mode 100755 index 05ba7e3bc6..0000000000 --- a/python/Lib/cmd.py +++ /dev/null @@ -1,404 +0,0 @@ -"""A generic class to build line-oriented command interpreters. - -Interpreters constructed with this class obey the following conventions: - -1. End of file on input is processed as the command 'EOF'. -2. A command is parsed out of each line by collecting the prefix composed - of characters in the identchars member. -3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method - is passed a single argument consisting of the remainder of the line. -4. Typing an empty line repeats the last command. (Actually, it calls the - method `emptyline', which may be overridden in a subclass.) -5. There is a predefined `help' method. Given an argument `topic', it - calls the command `help_topic'. With no arguments, it lists all topics - with defined help_ functions, broken into up to three topics; documented - commands, miscellaneous help topics, and undocumented commands. -6. The command '?' is a synonym for `help'. The command '!' is a synonym - for `shell', if a do_shell method exists. -7. If completion is enabled, completing commands will be done automatically, - and completing of commands args is done by calling complete_foo() with - arguments text, line, begidx, endidx. text is string we are matching - against, all returned matches must begin with it. line is the current - input line (lstripped), begidx and endidx are the beginning and end - indexes of the text being matched, which could be used to provide - different completion depending upon which position the argument is in. - -The `default' method may be overridden to intercept commands for which there -is no do_ method. - -The `completedefault' method may be overridden to intercept completions for -commands that have no complete_ method. - -The data member `self.ruler' sets the character used to draw separator lines -in the help messages. If empty, no ruler line is drawn. It defaults to "=". - -If the value of `self.intro' is nonempty when the cmdloop method is called, -it is printed out on interpreter startup. This value may be overridden -via an optional argument to the cmdloop() method. - -The data members `self.doc_header', `self.misc_header', and -`self.undoc_header' set the headers used for the help function's -listings of documented functions, miscellaneous topics, and undocumented -functions respectively. - -These interpreters use raw_input; thus, if the readline module is loaded, -they automatically support Emacs-like command history and editing features. -""" - -import string - -__all__ = ["Cmd"] - -PROMPT = '(Cmd) ' -IDENTCHARS = string.ascii_letters + string.digits + '_' - -class Cmd: - """A simple framework for writing line-oriented command interpreters. - - These are often useful for test harnesses, administrative tools, and - prototypes that will later be wrapped in a more sophisticated interface. - - A Cmd instance or subclass instance is a line-oriented interpreter - framework. There is no good reason to instantiate Cmd itself; rather, - it's useful as a superclass of an interpreter class you define yourself - in order to inherit Cmd's methods and encapsulate action methods. - - """ - prompt = PROMPT - identchars = IDENTCHARS - ruler = '=' - lastcmd = '' - intro = None - doc_leader = "" - doc_header = "Documented commands (type help ):" - misc_header = "Miscellaneous help topics:" - undoc_header = "Undocumented commands:" - nohelp = "*** No help on %s" - use_rawinput = 1 - - def __init__(self, completekey='tab', stdin=None, stdout=None): - """Instantiate a line-oriented interpreter framework. - - The optional argument 'completekey' is the readline name of a - completion key; it defaults to the Tab key. If completekey is - not None and the readline module is available, command completion - is done automatically. The optional arguments stdin and stdout - specify alternate input and output file objects; if not specified, - sys.stdin and sys.stdout are used. - - """ - import sys - if stdin is not None: - self.stdin = stdin - else: - self.stdin = sys.stdin - if stdout is not None: - self.stdout = stdout - else: - self.stdout = sys.stdout - self.cmdqueue = [] - self.completekey = completekey - - def cmdloop(self, intro=None): - """Repeatedly issue a prompt, accept input, parse an initial prefix - off the received input, and dispatch to action methods, passing them - the remainder of the line as argument. - - """ - - self.preloop() - if self.use_rawinput and self.completekey: - try: - import readline - self.old_completer = readline.get_completer() - readline.set_completer(self.complete) - readline.parse_and_bind(self.completekey+": complete") - except ImportError: - pass - try: - if intro is not None: - self.intro = intro - if self.intro: - self.stdout.write(str(self.intro)+"\n") - stop = None - while not stop: - if self.cmdqueue: - line = self.cmdqueue.pop(0) - else: - if self.use_rawinput: - try: - line = raw_input(self.prompt) - except EOFError: - line = 'EOF' - else: - self.stdout.write(self.prompt) - self.stdout.flush() - line = self.stdin.readline() - if not len(line): - line = 'EOF' - else: - line = line.rstrip('\r\n') - line = self.precmd(line) - stop = self.onecmd(line) - stop = self.postcmd(stop, line) - self.postloop() - finally: - if self.use_rawinput and self.completekey: - try: - import readline - readline.set_completer(self.old_completer) - except ImportError: - pass - - - def precmd(self, line): - """Hook method executed just before the command line is - interpreted, but after the input prompt is generated and issued. - - """ - return line - - def postcmd(self, stop, line): - """Hook method executed just after a command dispatch is finished.""" - return stop - - def preloop(self): - """Hook method executed once when the cmdloop() method is called.""" - pass - - def postloop(self): - """Hook method executed once when the cmdloop() method is about to - return. - - """ - pass - - def parseline(self, line): - """Parse the line into a command name and a string containing - the arguments. Returns a tuple containing (command, args, line). - 'command' and 'args' may be None if the line couldn't be parsed. - """ - line = line.strip() - if not line: - return None, None, line - elif line[0] == '?': - line = 'help ' + line[1:] - elif line[0] == '!': - if hasattr(self, 'do_shell'): - line = 'shell ' + line[1:] - else: - return None, None, line - i, n = 0, len(line) - while i < n and line[i] in self.identchars: i = i+1 - cmd, arg = line[:i], line[i:].strip() - return cmd, arg, line - - def onecmd(self, line): - """Interpret the argument as though it had been typed in response - to the prompt. - - This may be overridden, but should not normally need to be; - see the precmd() and postcmd() methods for useful execution hooks. - The return value is a flag indicating whether interpretation of - commands by the interpreter should stop. - - """ - cmd, arg, line = self.parseline(line) - if not line: - return self.emptyline() - if cmd is None: - return self.default(line) - self.lastcmd = line - if line == 'EOF' : - self.lastcmd = '' - if cmd == '': - return self.default(line) - else: - try: - func = getattr(self, 'do_' + cmd) - except AttributeError: - return self.default(line) - return func(arg) - - def emptyline(self): - """Called when an empty line is entered in response to the prompt. - - If this method is not overridden, it repeats the last nonempty - command entered. - - """ - if self.lastcmd: - return self.onecmd(self.lastcmd) - - def default(self, line): - """Called on an input line when the command prefix is not recognized. - - If this method is not overridden, it prints an error message and - returns. - - """ - self.stdout.write('*** Unknown syntax: %s\n'%line) - - def completedefault(self, *ignored): - """Method called to complete an input line when no command-specific - complete_*() method is available. - - By default, it returns an empty list. - - """ - return [] - - def completenames(self, text, *ignored): - dotext = 'do_'+text - return [a[3:] for a in self.get_names() if a.startswith(dotext)] - - def complete(self, text, state): - """Return the next possible completion for 'text'. - - If a command has not been entered, then complete against command list. - Otherwise try to call complete_ to get list of completions. - """ - if state == 0: - import readline - origline = readline.get_line_buffer() - line = origline.lstrip() - stripped = len(origline) - len(line) - begidx = readline.get_begidx() - stripped - endidx = readline.get_endidx() - stripped - if begidx>0: - cmd, args, foo = self.parseline(line) - if cmd == '': - compfunc = self.completedefault - else: - try: - compfunc = getattr(self, 'complete_' + cmd) - except AttributeError: - compfunc = self.completedefault - else: - compfunc = self.completenames - self.completion_matches = compfunc(text, line, begidx, endidx) - try: - return self.completion_matches[state] - except IndexError: - return None - - def get_names(self): - # This method used to pull in base class attributes - # at a time dir() didn't do it yet. - return dir(self.__class__) - - def complete_help(self, *args): - commands = set(self.completenames(*args)) - topics = set(a[5:] for a in self.get_names() - if a.startswith('help_' + args[0])) - return list(commands | topics) - - def do_help(self, arg): - 'List available commands with "help" or detailed help with "help cmd".' - if arg: - # XXX check arg syntax - try: - func = getattr(self, 'help_' + arg) - except AttributeError: - try: - doc=getattr(self, 'do_' + arg).__doc__ - if doc: - self.stdout.write("%s\n"%str(doc)) - return - except AttributeError: - pass - self.stdout.write("%s\n"%str(self.nohelp % (arg,))) - return - func() - else: - names = self.get_names() - cmds_doc = [] - cmds_undoc = [] - help = {} - for name in names: - if name[:5] == 'help_': - help[name[5:]]=1 - names.sort() - # There can be duplicates if routines overridden - prevname = '' - for name in names: - if name[:3] == 'do_': - if name == prevname: - continue - prevname = name - cmd=name[3:] - if cmd in help: - cmds_doc.append(cmd) - del help[cmd] - elif getattr(self, name).__doc__: - cmds_doc.append(cmd) - else: - cmds_undoc.append(cmd) - self.stdout.write("%s\n"%str(self.doc_leader)) - self.print_topics(self.doc_header, cmds_doc, 15,80) - self.print_topics(self.misc_header, help.keys(),15,80) - self.print_topics(self.undoc_header, cmds_undoc, 15,80) - - def print_topics(self, header, cmds, cmdlen, maxcol): - if cmds: - self.stdout.write("%s\n"%str(header)) - if self.ruler: - self.stdout.write("%s\n"%str(self.ruler * len(header))) - self.columnize(cmds, maxcol-1) - self.stdout.write("\n") - - def columnize(self, list, displaywidth=80): - """Display a list of strings as a compact set of columns. - - Each column is only as wide as necessary. - Columns are separated by two spaces (one was not legible enough). - """ - if not list: - self.stdout.write("\n") - return - nonstrings = [i for i in range(len(list)) - if not isinstance(list[i], str)] - if nonstrings: - raise TypeError, ("list[i] not a string for i in %s" % - ", ".join(map(str, nonstrings))) - size = len(list) - if size == 1: - self.stdout.write('%s\n'%str(list[0])) - return - # Try every row count from 1 upwards - for nrows in range(1, len(list)): - ncols = (size+nrows-1) // nrows - colwidths = [] - totwidth = -2 - for col in range(ncols): - colwidth = 0 - for row in range(nrows): - i = row + nrows*col - if i >= size: - break - x = list[i] - colwidth = max(colwidth, len(x)) - colwidths.append(colwidth) - totwidth += colwidth + 2 - if totwidth > displaywidth: - break - if totwidth <= displaywidth: - break - else: - nrows = len(list) - ncols = 1 - colwidths = [0] - for row in range(nrows): - texts = [] - for col in range(ncols): - i = row + nrows*col - if i >= size: - x = "" - else: - x = list[i] - texts.append(x) - while texts and not texts[-1]: - del texts[-1] - for col in range(len(texts)): - texts[col] = texts[col].ljust(colwidths[col]) - self.stdout.write("%s\n"%str(" ".join(texts))) diff --git a/python/Lib/code.py b/python/Lib/code.py deleted file mode 100755 index 3b39d1b346..0000000000 --- a/python/Lib/code.py +++ /dev/null @@ -1,310 +0,0 @@ -"""Utilities needed to emulate Python's interactive interpreter. - -""" - -# Inspired by similar code by Jeff Epler and Fredrik Lundh. - - -import sys -import traceback -from codeop import CommandCompiler, compile_command - -__all__ = ["InteractiveInterpreter", "InteractiveConsole", "interact", - "compile_command"] - -def softspace(file, newvalue): - oldvalue = 0 - try: - oldvalue = file.softspace - except AttributeError: - pass - try: - file.softspace = newvalue - except (AttributeError, TypeError): - # "attribute-less object" or "read-only attributes" - pass - return oldvalue - -class InteractiveInterpreter: - """Base class for InteractiveConsole. - - This class deals with parsing and interpreter state (the user's - namespace); it doesn't deal with input buffering or prompting or - input file naming (the filename is always passed in explicitly). - - """ - - def __init__(self, locals=None): - """Constructor. - - The optional 'locals' argument specifies the dictionary in - which code will be executed; it defaults to a newly created - dictionary with key "__name__" set to "__console__" and key - "__doc__" set to None. - - """ - if locals is None: - locals = {"__name__": "__console__", "__doc__": None} - self.locals = locals - self.compile = CommandCompiler() - - def runsource(self, source, filename="", symbol="single"): - """Compile and run some source in the interpreter. - - Arguments are as for compile_command(). - - One several things can happen: - - 1) The input is incorrect; compile_command() raised an - exception (SyntaxError or OverflowError). A syntax traceback - will be printed by calling the showsyntaxerror() method. - - 2) The input is incomplete, and more input is required; - compile_command() returned None. Nothing happens. - - 3) The input is complete; compile_command() returned a code - object. The code is executed by calling self.runcode() (which - also handles run-time exceptions, except for SystemExit). - - The return value is True in case 2, False in the other cases (unless - an exception is raised). The return value can be used to - decide whether to use sys.ps1 or sys.ps2 to prompt the next - line. - - """ - try: - code = self.compile(source, filename, symbol) - except (OverflowError, SyntaxError, ValueError): - # Case 1 - self.showsyntaxerror(filename) - return False - - if code is None: - # Case 2 - return True - - # Case 3 - self.runcode(code) - return False - - def runcode(self, code): - """Execute a code object. - - When an exception occurs, self.showtraceback() is called to - display a traceback. All exceptions are caught except - SystemExit, which is reraised. - - A note about KeyboardInterrupt: this exception may occur - elsewhere in this code, and may not always be caught. The - caller should be prepared to deal with it. - - """ - try: - exec code in self.locals - except SystemExit: - raise - except: - self.showtraceback() - else: - if softspace(sys.stdout, 0): - print - - def showsyntaxerror(self, filename=None): - """Display the syntax error that just occurred. - - This doesn't display a stack trace because there isn't one. - - If a filename is given, it is stuffed in the exception instead - of what was there before (because Python's parser always uses - "" when reading from a string). - - The output is written by self.write(), below. - - """ - type, value, sys.last_traceback = sys.exc_info() - sys.last_type = type - sys.last_value = value - if filename and type is SyntaxError: - # Work hard to stuff the correct filename in the exception - try: - msg, (dummy_filename, lineno, offset, line) = value - except: - # Not the format we expect; leave it alone - pass - else: - # Stuff in the right filename - value = SyntaxError(msg, (filename, lineno, offset, line)) - sys.last_value = value - list = traceback.format_exception_only(type, value) - map(self.write, list) - - def showtraceback(self): - """Display the exception that just occurred. - - We remove the first stack item because it is our own code. - - The output is written by self.write(), below. - - """ - try: - type, value, tb = sys.exc_info() - sys.last_type = type - sys.last_value = value - sys.last_traceback = tb - tblist = traceback.extract_tb(tb) - del tblist[:1] - list = traceback.format_list(tblist) - if list: - list.insert(0, "Traceback (most recent call last):\n") - list[len(list):] = traceback.format_exception_only(type, value) - finally: - tblist = tb = None - map(self.write, list) - - def write(self, data): - """Write a string. - - The base implementation writes to sys.stderr; a subclass may - replace this with a different implementation. - - """ - sys.stderr.write(data) - - -class InteractiveConsole(InteractiveInterpreter): - """Closely emulate the behavior of the interactive Python interpreter. - - This class builds on InteractiveInterpreter and adds prompting - using the familiar sys.ps1 and sys.ps2, and input buffering. - - """ - - def __init__(self, locals=None, filename=""): - """Constructor. - - The optional locals argument will be passed to the - InteractiveInterpreter base class. - - The optional filename argument should specify the (file)name - of the input stream; it will show up in tracebacks. - - """ - InteractiveInterpreter.__init__(self, locals) - self.filename = filename - self.resetbuffer() - - def resetbuffer(self): - """Reset the input buffer.""" - self.buffer = [] - - def interact(self, banner=None): - """Closely emulate the interactive Python console. - - The optional banner argument specify the banner to print - before the first interaction; by default it prints a banner - similar to the one printed by the real Python interpreter, - followed by the current class name in parentheses (so as not - to confuse this with the real interpreter -- since it's so - close!). - - """ - try: - sys.ps1 - except AttributeError: - sys.ps1 = ">>> " - try: - sys.ps2 - except AttributeError: - sys.ps2 = "... " - cprt = 'Type "help", "copyright", "credits" or "license" for more information.' - if banner is None: - self.write("Python %s on %s\n%s\n(%s)\n" % - (sys.version, sys.platform, cprt, - self.__class__.__name__)) - else: - self.write("%s\n" % str(banner)) - more = 0 - while 1: - try: - if more: - prompt = sys.ps2 - else: - prompt = sys.ps1 - try: - line = self.raw_input(prompt) - # Can be None if sys.stdin was redefined - encoding = getattr(sys.stdin, "encoding", None) - if encoding and not isinstance(line, unicode): - line = line.decode(encoding) - except EOFError: - self.write("\n") - break - else: - more = self.push(line) - except KeyboardInterrupt: - self.write("\nKeyboardInterrupt\n") - self.resetbuffer() - more = 0 - - def push(self, line): - """Push a line to the interpreter. - - The line should not have a trailing newline; it may have - internal newlines. The line is appended to a buffer and the - interpreter's runsource() method is called with the - concatenated contents of the buffer as source. If this - indicates that the command was executed or invalid, the buffer - is reset; otherwise, the command is incomplete, and the buffer - is left as it was after the line was appended. The return - value is 1 if more input is required, 0 if the line was dealt - with in some way (this is the same as runsource()). - - """ - self.buffer.append(line) - source = "\n".join(self.buffer) - more = self.runsource(source, self.filename) - if not more: - self.resetbuffer() - return more - - def raw_input(self, prompt=""): - """Write a prompt and read a line. - - The returned line does not include the trailing newline. - When the user enters the EOF key sequence, EOFError is raised. - - The base implementation uses the built-in function - raw_input(); a subclass may replace this with a different - implementation. - - """ - return raw_input(prompt) - - -def interact(banner=None, readfunc=None, local=None): - """Closely emulate the interactive Python interpreter. - - This is a backwards compatible interface to the InteractiveConsole - class. When readfunc is not specified, it attempts to import the - readline module to enable GNU readline if it is available. - - Arguments (all optional, all default to None): - - banner -- passed to InteractiveConsole.interact() - readfunc -- if not None, replaces InteractiveConsole.raw_input() - local -- passed to InteractiveInterpreter.__init__() - - """ - console = InteractiveConsole(local) - if readfunc is not None: - console.raw_input = readfunc - else: - try: - import readline - except ImportError: - pass - console.interact(banner) - - -if __name__ == "__main__": - interact() diff --git a/python/Lib/codecs.py b/python/Lib/codecs.py deleted file mode 100755 index 20357aeb6c..0000000000 --- a/python/Lib/codecs.py +++ /dev/null @@ -1,1113 +0,0 @@ -""" codecs -- Python Codec Registry, API and helpers. - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -"""#" - -import __builtin__, sys - -### Registry and builtin stateless codec functions - -try: - from _codecs import * -except ImportError, why: - raise SystemError('Failed to load the builtin codecs: %s' % why) - -__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE", - "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", - "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE", - "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE", - "CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder", - "StreamReader", "StreamWriter", - "StreamReaderWriter", "StreamRecoder", - "getencoder", "getdecoder", "getincrementalencoder", - "getincrementaldecoder", "getreader", "getwriter", - "encode", "decode", "iterencode", "iterdecode", - "strict_errors", "ignore_errors", "replace_errors", - "xmlcharrefreplace_errors", "backslashreplace_errors", - "register_error", "lookup_error"] - -### Constants - -# -# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF) -# and its possible byte string values -# for UTF8/UTF16/UTF32 output and little/big endian machines -# - -# UTF-8 -BOM_UTF8 = '\xef\xbb\xbf' - -# UTF-16, little endian -BOM_LE = BOM_UTF16_LE = '\xff\xfe' - -# UTF-16, big endian -BOM_BE = BOM_UTF16_BE = '\xfe\xff' - -# UTF-32, little endian -BOM_UTF32_LE = '\xff\xfe\x00\x00' - -# UTF-32, big endian -BOM_UTF32_BE = '\x00\x00\xfe\xff' - -if sys.byteorder == 'little': - - # UTF-16, native endianness - BOM = BOM_UTF16 = BOM_UTF16_LE - - # UTF-32, native endianness - BOM_UTF32 = BOM_UTF32_LE - -else: - - # UTF-16, native endianness - BOM = BOM_UTF16 = BOM_UTF16_BE - - # UTF-32, native endianness - BOM_UTF32 = BOM_UTF32_BE - -# Old broken names (don't use in new code) -BOM32_LE = BOM_UTF16_LE -BOM32_BE = BOM_UTF16_BE -BOM64_LE = BOM_UTF32_LE -BOM64_BE = BOM_UTF32_BE - - -### Codec base classes (defining the API) - -class CodecInfo(tuple): - """Codec details when looking up the codec registry""" - - # Private API to allow Python to blacklist the known non-Unicode - # codecs in the standard library. A more general mechanism to - # reliably distinguish test encodings from other codecs will hopefully - # be defined for Python 3.5 - # - # See http://bugs.python.org/issue19619 - _is_text_encoding = True # Assume codecs are text encodings by default - - def __new__(cls, encode, decode, streamreader=None, streamwriter=None, - incrementalencoder=None, incrementaldecoder=None, name=None, - _is_text_encoding=None): - self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) - self.name = name - self.encode = encode - self.decode = decode - self.incrementalencoder = incrementalencoder - self.incrementaldecoder = incrementaldecoder - self.streamwriter = streamwriter - self.streamreader = streamreader - if _is_text_encoding is not None: - self._is_text_encoding = _is_text_encoding - return self - - def __repr__(self): - return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self)) - -class Codec: - - """ Defines the interface for stateless encoders/decoders. - - The .encode()/.decode() methods may use different error - handling schemes by providing the errors argument. These - string values are predefined: - - 'strict' - raise a ValueError error (or a subclass) - 'ignore' - ignore the character and continue with the next - 'replace' - replace with a suitable replacement character; - Python will use the official U+FFFD REPLACEMENT - CHARACTER for the builtin Unicode codecs on - decoding and '?' on encoding. - 'xmlcharrefreplace' - Replace with the appropriate XML - character reference (only for encoding). - 'backslashreplace' - Replace with backslashed escape sequences - (only for encoding). - - The set of allowed values can be extended via register_error. - - """ - def encode(self, input, errors='strict'): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling. - - The method may not store state in the Codec instance. Use - StreamWriter for codecs which have to keep state in order to - make encoding efficient. - - The encoder must be able to handle zero length input and - return an empty object of the output object type in this - situation. - - """ - raise NotImplementedError - - def decode(self, input, errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling. - - The method may not store state in the Codec instance. Use - StreamReader for codecs which have to keep state in order to - make decoding efficient. - - The decoder must be able to handle zero length input and - return an empty object of the output object type in this - situation. - - """ - raise NotImplementedError - -class IncrementalEncoder(object): - """ - An IncrementalEncoder encodes an input in multiple steps. The input can be - passed piece by piece to the encode() method. The IncrementalEncoder remembers - the state of the Encoding process between calls to encode(). - """ - def __init__(self, errors='strict'): - """ - Creates an IncrementalEncoder instance. - - The IncrementalEncoder may use different error handling schemes by - providing the errors keyword argument. See the module docstring - for a list of possible values. - """ - self.errors = errors - self.buffer = "" - - def encode(self, input, final=False): - """ - Encodes input and returns the resulting object. - """ - raise NotImplementedError - - def reset(self): - """ - Resets the encoder to the initial state. - """ - - def getstate(self): - """ - Return the current state of the encoder. - """ - return 0 - - def setstate(self, state): - """ - Set the current state of the encoder. state must have been - returned by getstate(). - """ - -class BufferedIncrementalEncoder(IncrementalEncoder): - """ - This subclass of IncrementalEncoder can be used as the baseclass for an - incremental encoder if the encoder must keep some of the output in a - buffer between calls to encode(). - """ - def __init__(self, errors='strict'): - IncrementalEncoder.__init__(self, errors) - self.buffer = "" # unencoded input that is kept between calls to encode() - - def _buffer_encode(self, input, errors, final): - # Overwrite this method in subclasses: It must encode input - # and return an (output, length consumed) tuple - raise NotImplementedError - - def encode(self, input, final=False): - # encode input (taking the buffer into account) - data = self.buffer + input - (result, consumed) = self._buffer_encode(data, self.errors, final) - # keep unencoded input until the next call - self.buffer = data[consumed:] - return result - - def reset(self): - IncrementalEncoder.reset(self) - self.buffer = "" - - def getstate(self): - return self.buffer or 0 - - def setstate(self, state): - self.buffer = state or "" - -class IncrementalDecoder(object): - """ - An IncrementalDecoder decodes an input in multiple steps. The input can be - passed piece by piece to the decode() method. The IncrementalDecoder - remembers the state of the decoding process between calls to decode(). - """ - def __init__(self, errors='strict'): - """ - Creates an IncrementalDecoder instance. - - The IncrementalDecoder may use different error handling schemes by - providing the errors keyword argument. See the module docstring - for a list of possible values. - """ - self.errors = errors - - def decode(self, input, final=False): - """ - Decodes input and returns the resulting object. - """ - raise NotImplementedError - - def reset(self): - """ - Resets the decoder to the initial state. - """ - - def getstate(self): - """ - Return the current state of the decoder. - - This must be a (buffered_input, additional_state_info) tuple. - buffered_input must be a bytes object containing bytes that - were passed to decode() that have not yet been converted. - additional_state_info must be a non-negative integer - representing the state of the decoder WITHOUT yet having - processed the contents of buffered_input. In the initial state - and after reset(), getstate() must return (b"", 0). - """ - return (b"", 0) - - def setstate(self, state): - """ - Set the current state of the decoder. - - state must have been returned by getstate(). The effect of - setstate((b"", 0)) must be equivalent to reset(). - """ - -class BufferedIncrementalDecoder(IncrementalDecoder): - """ - This subclass of IncrementalDecoder can be used as the baseclass for an - incremental decoder if the decoder must be able to handle incomplete byte - sequences. - """ - def __init__(self, errors='strict'): - IncrementalDecoder.__init__(self, errors) - self.buffer = "" # undecoded input that is kept between calls to decode() - - def _buffer_decode(self, input, errors, final): - # Overwrite this method in subclasses: It must decode input - # and return an (output, length consumed) tuple - raise NotImplementedError - - def decode(self, input, final=False): - # decode input (taking the buffer into account) - data = self.buffer + input - (result, consumed) = self._buffer_decode(data, self.errors, final) - # keep undecoded input until the next call - self.buffer = data[consumed:] - return result - - def reset(self): - IncrementalDecoder.reset(self) - self.buffer = "" - - def getstate(self): - # additional state info is always 0 - return (self.buffer, 0) - - def setstate(self, state): - # ignore additional state info - self.buffer = state[0] - -# -# The StreamWriter and StreamReader class provide generic working -# interfaces which can be used to implement new encoding submodules -# very easily. See encodings/utf_8.py for an example on how this is -# done. -# - -class StreamWriter(Codec): - - def __init__(self, stream, errors='strict'): - - """ Creates a StreamWriter instance. - - stream must be a file-like object open for writing - (binary) data. - - The StreamWriter may use different error handling - schemes by providing the errors keyword argument. These - parameters are predefined: - - 'strict' - raise a ValueError (or a subclass) - 'ignore' - ignore the character and continue with the next - 'replace'- replace with a suitable replacement character - 'xmlcharrefreplace' - Replace with the appropriate XML - character reference. - 'backslashreplace' - Replace with backslashed escape - sequences (only for encoding). - - The set of allowed parameter values can be extended via - register_error. - """ - self.stream = stream - self.errors = errors - - def write(self, object): - - """ Writes the object's contents encoded to self.stream. - """ - data, consumed = self.encode(object, self.errors) - self.stream.write(data) - - def writelines(self, list): - - """ Writes the concatenated list of strings to the stream - using .write(). - """ - self.write(''.join(list)) - - def reset(self): - - """ Flushes and resets the codec buffers used for keeping state. - - Calling this method should ensure that the data on the - output is put into a clean state, that allows appending - of new fresh data without having to rescan the whole - stream to recover state. - - """ - pass - - def seek(self, offset, whence=0): - self.stream.seek(offset, whence) - if whence == 0 and offset == 0: - self.reset() - - def __getattr__(self, name, - getattr=getattr): - - """ Inherit all other methods from the underlying stream. - """ - return getattr(self.stream, name) - - def __enter__(self): - return self - - def __exit__(self, type, value, tb): - self.stream.close() - -### - -class StreamReader(Codec): - - def __init__(self, stream, errors='strict'): - - """ Creates a StreamReader instance. - - stream must be a file-like object open for reading - (binary) data. - - The StreamReader may use different error handling - schemes by providing the errors keyword argument. These - parameters are predefined: - - 'strict' - raise a ValueError (or a subclass) - 'ignore' - ignore the character and continue with the next - 'replace'- replace with a suitable replacement character; - - The set of allowed parameter values can be extended via - register_error. - """ - self.stream = stream - self.errors = errors - self.bytebuffer = "" - # For str->str decoding this will stay a str - # For str->unicode decoding the first read will promote it to unicode - self.charbuffer = "" - self.linebuffer = None - - def decode(self, input, errors='strict'): - raise NotImplementedError - - def read(self, size=-1, chars=-1, firstline=False): - - """ Decodes data from the stream self.stream and returns the - resulting object. - - chars indicates the number of characters to read from the - stream. read() will never return more than chars - characters, but it might return less, if there are not enough - characters available. - - size indicates the approximate maximum number of bytes to - read from the stream for decoding purposes. The decoder - can modify this setting as appropriate. The default value - -1 indicates to read and decode as much as possible. size - is intended to prevent having to decode huge files in one - step. - - If firstline is true, and a UnicodeDecodeError happens - after the first line terminator in the input only the first line - will be returned, the rest of the input will be kept until the - next call to read(). - - The method should use a greedy read strategy meaning that - it should read as much data as is allowed within the - definition of the encoding and the given size, e.g. if - optional encoding endings or state markers are available - on the stream, these should be read too. - """ - # If we have lines cached, first merge them back into characters - if self.linebuffer: - self.charbuffer = "".join(self.linebuffer) - self.linebuffer = None - - # read until we get the required number of characters (if available) - while True: - # can the request be satisfied from the character buffer? - if chars >= 0: - if len(self.charbuffer) >= chars: - break - elif size >= 0: - if len(self.charbuffer) >= size: - break - # we need more data - if size < 0: - newdata = self.stream.read() - else: - newdata = self.stream.read(size) - # decode bytes (those remaining from the last call included) - data = self.bytebuffer + newdata - try: - newchars, decodedbytes = self.decode(data, self.errors) - except UnicodeDecodeError, exc: - if firstline: - newchars, decodedbytes = self.decode(data[:exc.start], self.errors) - lines = newchars.splitlines(True) - if len(lines)<=1: - raise - else: - raise - # keep undecoded bytes until the next call - self.bytebuffer = data[decodedbytes:] - # put new characters in the character buffer - self.charbuffer += newchars - # there was no data available - if not newdata: - break - if chars < 0: - # Return everything we've got - result = self.charbuffer - self.charbuffer = "" - else: - # Return the first chars characters - result = self.charbuffer[:chars] - self.charbuffer = self.charbuffer[chars:] - return result - - def readline(self, size=None, keepends=True): - - """ Read one line from the input stream and return the - decoded data. - - size, if given, is passed as size argument to the - read() method. - - """ - # If we have lines cached from an earlier read, return - # them unconditionally - if self.linebuffer: - line = self.linebuffer[0] - del self.linebuffer[0] - if len(self.linebuffer) == 1: - # revert to charbuffer mode; we might need more data - # next time - self.charbuffer = self.linebuffer[0] - self.linebuffer = None - if not keepends: - line = line.splitlines(False)[0] - return line - - readsize = size or 72 - line = "" - # If size is given, we call read() only once - while True: - data = self.read(readsize, firstline=True) - if data: - # If we're at a "\r" read one extra character (which might - # be a "\n") to get a proper line ending. If the stream is - # temporarily exhausted we return the wrong line ending. - if data.endswith("\r"): - data += self.read(size=1, chars=1) - - line += data - lines = line.splitlines(True) - if lines: - if len(lines) > 1: - # More than one line result; the first line is a full line - # to return - line = lines[0] - del lines[0] - if len(lines) > 1: - # cache the remaining lines - lines[-1] += self.charbuffer - self.linebuffer = lines - self.charbuffer = None - else: - # only one remaining line, put it back into charbuffer - self.charbuffer = lines[0] + self.charbuffer - if not keepends: - line = line.splitlines(False)[0] - break - line0withend = lines[0] - line0withoutend = lines[0].splitlines(False)[0] - if line0withend != line0withoutend: # We really have a line end - # Put the rest back together and keep it until the next call - self.charbuffer = "".join(lines[1:]) + self.charbuffer - if keepends: - line = line0withend - else: - line = line0withoutend - break - # we didn't get anything or this was our only try - if not data or size is not None: - if line and not keepends: - line = line.splitlines(False)[0] - break - if readsize<8000: - readsize *= 2 - return line - - def readlines(self, sizehint=None, keepends=True): - - """ Read all lines available on the input stream - and return them as list of lines. - - Line breaks are implemented using the codec's decoder - method and are included in the list entries. - - sizehint, if given, is ignored since there is no efficient - way to finding the true end-of-line. - - """ - data = self.read() - return data.splitlines(keepends) - - def reset(self): - - """ Resets the codec buffers used for keeping state. - - Note that no stream repositioning should take place. - This method is primarily intended to be able to recover - from decoding errors. - - """ - self.bytebuffer = "" - self.charbuffer = u"" - self.linebuffer = None - - def seek(self, offset, whence=0): - """ Set the input stream's current position. - - Resets the codec buffers used for keeping state. - """ - self.stream.seek(offset, whence) - self.reset() - - def next(self): - - """ Return the next decoded line from the input stream.""" - line = self.readline() - if line: - return line - raise StopIteration - - def __iter__(self): - return self - - def __getattr__(self, name, - getattr=getattr): - - """ Inherit all other methods from the underlying stream. - """ - return getattr(self.stream, name) - - def __enter__(self): - return self - - def __exit__(self, type, value, tb): - self.stream.close() - -### - -class StreamReaderWriter: - - """ StreamReaderWriter instances allow wrapping streams which - work in both read and write modes. - - The design is such that one can use the factory functions - returned by the codec.lookup() function to construct the - instance. - - """ - # Optional attributes set by the file wrappers below - encoding = 'unknown' - - def __init__(self, stream, Reader, Writer, errors='strict'): - - """ Creates a StreamReaderWriter instance. - - stream must be a Stream-like object. - - Reader, Writer must be factory functions or classes - providing the StreamReader, StreamWriter interface resp. - - Error handling is done in the same way as defined for the - StreamWriter/Readers. - - """ - self.stream = stream - self.reader = Reader(stream, errors) - self.writer = Writer(stream, errors) - self.errors = errors - - def read(self, size=-1): - - return self.reader.read(size) - - def readline(self, size=None): - - return self.reader.readline(size) - - def readlines(self, sizehint=None): - - return self.reader.readlines(sizehint) - - def next(self): - - """ Return the next decoded line from the input stream.""" - return self.reader.next() - - def __iter__(self): - return self - - def write(self, data): - - return self.writer.write(data) - - def writelines(self, list): - - return self.writer.writelines(list) - - def reset(self): - - self.reader.reset() - self.writer.reset() - - def seek(self, offset, whence=0): - self.stream.seek(offset, whence) - self.reader.reset() - if whence == 0 and offset == 0: - self.writer.reset() - - def __getattr__(self, name, - getattr=getattr): - - """ Inherit all other methods from the underlying stream. - """ - return getattr(self.stream, name) - - # these are needed to make "with codecs.open(...)" work properly - - def __enter__(self): - return self - - def __exit__(self, type, value, tb): - self.stream.close() - -### - -class StreamRecoder: - - """ StreamRecoder instances provide a frontend - backend - view of encoding data. - - They use the complete set of APIs returned by the - codecs.lookup() function to implement their task. - - Data written to the stream is first decoded into an - intermediate format (which is dependent on the given codec - combination) and then written to the stream using an instance - of the provided Writer class. - - In the other direction, data is read from the stream using a - Reader instance and then return encoded data to the caller. - - """ - # Optional attributes set by the file wrappers below - data_encoding = 'unknown' - file_encoding = 'unknown' - - def __init__(self, stream, encode, decode, Reader, Writer, - errors='strict'): - - """ Creates a StreamRecoder instance which implements a two-way - conversion: encode and decode work on the frontend (the - input to .read() and output of .write()) while - Reader and Writer work on the backend (reading and - writing to the stream). - - You can use these objects to do transparent direct - recodings from e.g. latin-1 to utf-8 and back. - - stream must be a file-like object. - - encode, decode must adhere to the Codec interface, Reader, - Writer must be factory functions or classes providing the - StreamReader, StreamWriter interface resp. - - encode and decode are needed for the frontend translation, - Reader and Writer for the backend translation. Unicode is - used as intermediate encoding. - - Error handling is done in the same way as defined for the - StreamWriter/Readers. - - """ - self.stream = stream - self.encode = encode - self.decode = decode - self.reader = Reader(stream, errors) - self.writer = Writer(stream, errors) - self.errors = errors - - def read(self, size=-1): - - data = self.reader.read(size) - data, bytesencoded = self.encode(data, self.errors) - return data - - def readline(self, size=None): - - if size is None: - data = self.reader.readline() - else: - data = self.reader.readline(size) - data, bytesencoded = self.encode(data, self.errors) - return data - - def readlines(self, sizehint=None): - - data = self.reader.read() - data, bytesencoded = self.encode(data, self.errors) - return data.splitlines(1) - - def next(self): - - """ Return the next decoded line from the input stream.""" - data = self.reader.next() - data, bytesencoded = self.encode(data, self.errors) - return data - - def __iter__(self): - return self - - def write(self, data): - - data, bytesdecoded = self.decode(data, self.errors) - return self.writer.write(data) - - def writelines(self, list): - - data = ''.join(list) - data, bytesdecoded = self.decode(data, self.errors) - return self.writer.write(data) - - def reset(self): - - self.reader.reset() - self.writer.reset() - - def __getattr__(self, name, - getattr=getattr): - - """ Inherit all other methods from the underlying stream. - """ - return getattr(self.stream, name) - - def __enter__(self): - return self - - def __exit__(self, type, value, tb): - self.stream.close() - -### Shortcuts - -def open(filename, mode='rb', encoding=None, errors='strict', buffering=1): - - """ Open an encoded file using the given mode and return - a wrapped version providing transparent encoding/decoding. - - Note: The wrapped version will only accept the object format - defined by the codecs, i.e. Unicode objects for most builtin - codecs. Output is also codec dependent and will usually be - Unicode as well. - - Files are always opened in binary mode, even if no binary mode - was specified. This is done to avoid data loss due to encodings - using 8-bit values. The default file mode is 'rb' meaning to - open the file in binary read mode. - - encoding specifies the encoding which is to be used for the - file. - - errors may be given to define the error handling. It defaults - to 'strict' which causes ValueErrors to be raised in case an - encoding error occurs. - - buffering has the same meaning as for the builtin open() API. - It defaults to line buffered. - - The returned wrapped file object provides an extra attribute - .encoding which allows querying the used encoding. This - attribute is only available if an encoding was specified as - parameter. - - """ - if encoding is not None: - if 'U' in mode: - # No automatic conversion of '\n' is done on reading and writing - mode = mode.strip().replace('U', '') - if mode[:1] not in set('rwa'): - mode = 'r' + mode - if 'b' not in mode: - # Force opening of the file in binary mode - mode = mode + 'b' - file = __builtin__.open(filename, mode, buffering) - if encoding is None: - return file - info = lookup(encoding) - srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors) - # Add attributes to simplify introspection - srw.encoding = encoding - return srw - -def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'): - - """ Return a wrapped version of file which provides transparent - encoding translation. - - Strings written to the wrapped file are interpreted according - to the given data_encoding and then written to the original - file as string using file_encoding. The intermediate encoding - will usually be Unicode but depends on the specified codecs. - - Strings are read from the file using file_encoding and then - passed back to the caller as string using data_encoding. - - If file_encoding is not given, it defaults to data_encoding. - - errors may be given to define the error handling. It defaults - to 'strict' which causes ValueErrors to be raised in case an - encoding error occurs. - - The returned wrapped file object provides two extra attributes - .data_encoding and .file_encoding which reflect the given - parameters of the same name. The attributes can be used for - introspection by Python programs. - - """ - if file_encoding is None: - file_encoding = data_encoding - data_info = lookup(data_encoding) - file_info = lookup(file_encoding) - sr = StreamRecoder(file, data_info.encode, data_info.decode, - file_info.streamreader, file_info.streamwriter, errors) - # Add attributes to simplify introspection - sr.data_encoding = data_encoding - sr.file_encoding = file_encoding - return sr - -### Helpers for codec lookup - -def getencoder(encoding): - - """ Lookup up the codec for the given encoding and return - its encoder function. - - Raises a LookupError in case the encoding cannot be found. - - """ - return lookup(encoding).encode - -def getdecoder(encoding): - - """ Lookup up the codec for the given encoding and return - its decoder function. - - Raises a LookupError in case the encoding cannot be found. - - """ - return lookup(encoding).decode - -def getincrementalencoder(encoding): - - """ Lookup up the codec for the given encoding and return - its IncrementalEncoder class or factory function. - - Raises a LookupError in case the encoding cannot be found - or the codecs doesn't provide an incremental encoder. - - """ - encoder = lookup(encoding).incrementalencoder - if encoder is None: - raise LookupError(encoding) - return encoder - -def getincrementaldecoder(encoding): - - """ Lookup up the codec for the given encoding and return - its IncrementalDecoder class or factory function. - - Raises a LookupError in case the encoding cannot be found - or the codecs doesn't provide an incremental decoder. - - """ - decoder = lookup(encoding).incrementaldecoder - if decoder is None: - raise LookupError(encoding) - return decoder - -def getreader(encoding): - - """ Lookup up the codec for the given encoding and return - its StreamReader class or factory function. - - Raises a LookupError in case the encoding cannot be found. - - """ - return lookup(encoding).streamreader - -def getwriter(encoding): - - """ Lookup up the codec for the given encoding and return - its StreamWriter class or factory function. - - Raises a LookupError in case the encoding cannot be found. - - """ - return lookup(encoding).streamwriter - -def iterencode(iterator, encoding, errors='strict', **kwargs): - """ - Encoding iterator. - - Encodes the input strings from the iterator using an IncrementalEncoder. - - errors and kwargs are passed through to the IncrementalEncoder - constructor. - """ - encoder = getincrementalencoder(encoding)(errors, **kwargs) - for input in iterator: - output = encoder.encode(input) - if output: - yield output - output = encoder.encode("", True) - if output: - yield output - -def iterdecode(iterator, encoding, errors='strict', **kwargs): - """ - Decoding iterator. - - Decodes the input strings from the iterator using an IncrementalDecoder. - - errors and kwargs are passed through to the IncrementalDecoder - constructor. - """ - decoder = getincrementaldecoder(encoding)(errors, **kwargs) - for input in iterator: - output = decoder.decode(input) - if output: - yield output - output = decoder.decode("", True) - if output: - yield output - -### Helpers for charmap-based codecs - -def make_identity_dict(rng): - - """ make_identity_dict(rng) -> dict - - Return a dictionary where elements of the rng sequence are - mapped to themselves. - - """ - res = {} - for i in rng: - res[i]=i - return res - -def make_encoding_map(decoding_map): - - """ Creates an encoding map from a decoding map. - - If a target mapping in the decoding map occurs multiple - times, then that target is mapped to None (undefined mapping), - causing an exception when encountered by the charmap codec - during translation. - - One example where this happens is cp875.py which decodes - multiple character to \\u001a. - - """ - m = {} - for k,v in decoding_map.items(): - if not v in m: - m[v] = k - else: - m[v] = None - return m - -### error handlers - -try: - strict_errors = lookup_error("strict") - ignore_errors = lookup_error("ignore") - replace_errors = lookup_error("replace") - xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") - backslashreplace_errors = lookup_error("backslashreplace") -except LookupError: - # In --disable-unicode builds, these error handler are missing - strict_errors = None - ignore_errors = None - replace_errors = None - xmlcharrefreplace_errors = None - backslashreplace_errors = None - -# Tell modulefinder that using codecs probably needs the encodings -# package -_false = 0 -if _false: - import encodings - -### Tests - -if __name__ == '__main__': - - # Make stdout translate Latin-1 output into UTF-8 output - sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8') - - # Have stdin translate Latin-1 input into UTF-8 input - sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1') diff --git a/python/Lib/codeop.py b/python/Lib/codeop.py deleted file mode 100755 index 5616d92a85..0000000000 --- a/python/Lib/codeop.py +++ /dev/null @@ -1,168 +0,0 @@ -r"""Utilities to compile possibly incomplete Python source code. - -This module provides two interfaces, broadly similar to the builtin -function compile(), which take program text, a filename and a 'mode' -and: - -- Return code object if the command is complete and valid -- Return None if the command is incomplete -- Raise SyntaxError, ValueError or OverflowError if the command is a - syntax error (OverflowError and ValueError can be produced by - malformed literals). - -Approach: - -First, check if the source consists entirely of blank lines and -comments; if so, replace it with 'pass', because the built-in -parser doesn't always do the right thing for these. - -Compile three times: as is, with \n, and with \n\n appended. If it -compiles as is, it's complete. If it compiles with one \n appended, -we expect more. If it doesn't compile either way, we compare the -error we get when compiling with \n or \n\n appended. If the errors -are the same, the code is broken. But if the errors are different, we -expect more. Not intuitive; not even guaranteed to hold in future -releases; but this matches the compiler's behavior from Python 1.4 -through 2.2, at least. - -Caveat: - -It is possible (but not likely) that the parser stops parsing with a -successful outcome before reaching the end of the source; in this -case, trailing symbols may be ignored instead of causing an error. -For example, a backslash followed by two newlines may be followed by -arbitrary garbage. This will be fixed once the API for the parser is -better. - -The two interfaces are: - -compile_command(source, filename, symbol): - - Compiles a single command in the manner described above. - -CommandCompiler(): - - Instances of this class have __call__ methods identical in - signature to compile_command; the difference is that if the - instance compiles program text containing a __future__ statement, - the instance 'remembers' and compiles all subsequent program texts - with the statement in force. - -The module also provides another class: - -Compile(): - - Instances of this class act like the built-in function compile, - but with 'memory' in the sense described above. -""" - -import __future__ - -_features = [getattr(__future__, fname) - for fname in __future__.all_feature_names] - -__all__ = ["compile_command", "Compile", "CommandCompiler"] - -PyCF_DONT_IMPLY_DEDENT = 0x200 # Matches pythonrun.h - -def _maybe_compile(compiler, source, filename, symbol): - # Check for source consisting of only blank lines and comments - for line in source.split("\n"): - line = line.strip() - if line and line[0] != '#': - break # Leave it alone - else: - if symbol != "eval": - source = "pass" # Replace it with a 'pass' statement - - err = err1 = err2 = None - code = code1 = code2 = None - - try: - code = compiler(source, filename, symbol) - except SyntaxError, err: - pass - - try: - code1 = compiler(source + "\n", filename, symbol) - except SyntaxError, err1: - pass - - try: - code2 = compiler(source + "\n\n", filename, symbol) - except SyntaxError, err2: - pass - - if code: - return code - if not code1 and repr(err1) == repr(err2): - raise SyntaxError, err1 - -def _compile(source, filename, symbol): - return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT) - -def compile_command(source, filename="", symbol="single"): - r"""Compile a command and determine whether it is incomplete. - - Arguments: - - source -- the source string; may contain \n characters - filename -- optional filename from which source was read; default - "" - symbol -- optional grammar start symbol; "single" (default) or "eval" - - Return value / exceptions raised: - - - Return a code object if the command is complete and valid - - Return None if the command is incomplete - - Raise SyntaxError, ValueError or OverflowError if the command is a - syntax error (OverflowError and ValueError can be produced by - malformed literals). - """ - return _maybe_compile(_compile, source, filename, symbol) - -class Compile: - """Instances of this class behave much like the built-in compile - function, but if one is used to compile text containing a future - statement, it "remembers" and compiles all subsequent program texts - with the statement in force.""" - def __init__(self): - self.flags = PyCF_DONT_IMPLY_DEDENT - - def __call__(self, source, filename, symbol): - codeob = compile(source, filename, symbol, self.flags, 1) - for feature in _features: - if codeob.co_flags & feature.compiler_flag: - self.flags |= feature.compiler_flag - return codeob - -class CommandCompiler: - """Instances of this class have __call__ methods identical in - signature to compile_command; the difference is that if the - instance compiles program text containing a __future__ statement, - the instance 'remembers' and compiles all subsequent program texts - with the statement in force.""" - - def __init__(self,): - self.compiler = Compile() - - def __call__(self, source, filename="", symbol="single"): - r"""Compile a command and determine whether it is incomplete. - - Arguments: - - source -- the source string; may contain \n characters - filename -- optional filename from which source was read; - default "" - symbol -- optional grammar start symbol; "single" (default) or - "eval" - - Return value / exceptions raised: - - - Return a code object if the command is complete and valid - - Return None if the command is incomplete - - Raise SyntaxError, ValueError or OverflowError if the command is a - syntax error (OverflowError and ValueError can be produced by - malformed literals). - """ - return _maybe_compile(self.compiler, source, filename, symbol) diff --git a/python/Lib/collections.py b/python/Lib/collections.py deleted file mode 100755 index f2ad9726d5..0000000000 --- a/python/Lib/collections.py +++ /dev/null @@ -1,742 +0,0 @@ -'''This module implements specialized container datatypes providing -alternatives to Python's general purpose built-in containers, dict, -list, set, and tuple. - -* namedtuple factory function for creating tuple subclasses with named fields -* deque list-like container with fast appends and pops on either end -* Counter dict subclass for counting hashable objects -* OrderedDict dict subclass that remembers the order entries were added -* defaultdict dict subclass that calls a factory function to supply missing values - -''' - -__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict'] -# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py. -# They should however be considered an integral part of collections.py. -from _abcoll import * -import _abcoll -__all__ += _abcoll.__all__ - -from _collections import deque, defaultdict -from operator import itemgetter as _itemgetter, eq as _eq -from keyword import iskeyword as _iskeyword -import sys as _sys -import heapq as _heapq -from itertools import repeat as _repeat, chain as _chain, starmap as _starmap -from itertools import imap as _imap - -try: - from thread import get_ident as _get_ident -except ImportError: - from dummy_thread import get_ident as _get_ident - - -################################################################################ -### OrderedDict -################################################################################ - -class OrderedDict(dict): - 'Dictionary that remembers insertion order' - # An inherited dict maps keys to values. - # The inherited dict provides __getitem__, __len__, __contains__, and get. - # The remaining methods are order-aware. - # Big-O running times for all methods are the same as regular dictionaries. - - # The internal self.__map dict maps keys to links in a doubly linked list. - # The circular doubly linked list starts and ends with a sentinel element. - # The sentinel element never gets deleted (this simplifies the algorithm). - # Each link is stored as a list of length three: [PREV, NEXT, KEY]. - - def __init__(*args, **kwds): - '''Initialize an ordered dictionary. The signature is the same as - regular dictionaries, but keyword arguments are not recommended because - their insertion order is arbitrary. - - ''' - if not args: - raise TypeError("descriptor '__init__' of 'OrderedDict' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - try: - self.__root - except AttributeError: - self.__root = root = [] # sentinel node - root[:] = [root, root, None] - self.__map = {} - self.__update(*args, **kwds) - - def __setitem__(self, key, value, dict_setitem=dict.__setitem__): - 'od.__setitem__(i, y) <==> od[i]=y' - # Setting a new item creates a new link at the end of the linked list, - # and the inherited dictionary is updated with the new key/value pair. - if key not in self: - root = self.__root - last = root[0] - last[1] = root[0] = self.__map[key] = [last, root, key] - return dict_setitem(self, key, value) - - def __delitem__(self, key, dict_delitem=dict.__delitem__): - 'od.__delitem__(y) <==> del od[y]' - # Deleting an existing item uses self.__map to find the link which gets - # removed by updating the links in the predecessor and successor nodes. - dict_delitem(self, key) - link_prev, link_next, _ = self.__map.pop(key) - link_prev[1] = link_next # update link_prev[NEXT] - link_next[0] = link_prev # update link_next[PREV] - - def __iter__(self): - 'od.__iter__() <==> iter(od)' - # Traverse the linked list in order. - root = self.__root - curr = root[1] # start at the first node - while curr is not root: - yield curr[2] # yield the curr[KEY] - curr = curr[1] # move to next node - - def __reversed__(self): - 'od.__reversed__() <==> reversed(od)' - # Traverse the linked list in reverse order. - root = self.__root - curr = root[0] # start at the last node - while curr is not root: - yield curr[2] # yield the curr[KEY] - curr = curr[0] # move to previous node - - def clear(self): - 'od.clear() -> None. Remove all items from od.' - root = self.__root - root[:] = [root, root, None] - self.__map.clear() - dict.clear(self) - - # -- the following methods do not depend on the internal structure -- - - def keys(self): - 'od.keys() -> list of keys in od' - return list(self) - - def values(self): - 'od.values() -> list of values in od' - return [self[key] for key in self] - - def items(self): - 'od.items() -> list of (key, value) pairs in od' - return [(key, self[key]) for key in self] - - def iterkeys(self): - 'od.iterkeys() -> an iterator over the keys in od' - return iter(self) - - def itervalues(self): - 'od.itervalues -> an iterator over the values in od' - for k in self: - yield self[k] - - def iteritems(self): - 'od.iteritems -> an iterator over the (key, value) pairs in od' - for k in self: - yield (k, self[k]) - - update = MutableMapping.update - - __update = update # let subclasses override update without breaking __init__ - - __marker = object() - - def pop(self, key, default=__marker): - '''od.pop(k[,d]) -> v, remove specified key and return the corresponding - value. If key is not found, d is returned if given, otherwise KeyError - is raised. - - ''' - if key in self: - result = self[key] - del self[key] - return result - if default is self.__marker: - raise KeyError(key) - return default - - def setdefault(self, key, default=None): - 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' - if key in self: - return self[key] - self[key] = default - return default - - def popitem(self, last=True): - '''od.popitem() -> (k, v), return and remove a (key, value) pair. - Pairs are returned in LIFO order if last is true or FIFO order if false. - - ''' - if not self: - raise KeyError('dictionary is empty') - key = next(reversed(self) if last else iter(self)) - value = self.pop(key) - return key, value - - def __repr__(self, _repr_running={}): - 'od.__repr__() <==> repr(od)' - call_key = id(self), _get_ident() - if call_key in _repr_running: - return '...' - _repr_running[call_key] = 1 - try: - if not self: - return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, self.items()) - finally: - del _repr_running[call_key] - - def __reduce__(self): - 'Return state information for pickling' - items = [[k, self[k]] for k in self] - inst_dict = vars(self).copy() - for k in vars(OrderedDict()): - inst_dict.pop(k, None) - if inst_dict: - return (self.__class__, (items,), inst_dict) - return self.__class__, (items,) - - def copy(self): - 'od.copy() -> a shallow copy of od' - return self.__class__(self) - - @classmethod - def fromkeys(cls, iterable, value=None): - '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S. - If not specified, the value defaults to None. - - ''' - self = cls() - for key in iterable: - self[key] = value - return self - - def __eq__(self, other): - '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive - while comparison to a regular mapping is order-insensitive. - - ''' - if isinstance(other, OrderedDict): - return dict.__eq__(self, other) and all(_imap(_eq, self, other)) - return dict.__eq__(self, other) - - def __ne__(self, other): - 'od.__ne__(y) <==> od!=y' - return not self == other - - # -- the following methods support python 3.x style dictionary views -- - - def viewkeys(self): - "od.viewkeys() -> a set-like object providing a view on od's keys" - return KeysView(self) - - def viewvalues(self): - "od.viewvalues() -> an object providing a view on od's values" - return ValuesView(self) - - def viewitems(self): - "od.viewitems() -> a set-like object providing a view on od's items" - return ItemsView(self) - - -################################################################################ -### namedtuple -################################################################################ - -_class_template = '''\ -class {typename}(tuple): - '{typename}({arg_list})' - - __slots__ = () - - _fields = {field_names!r} - - def __new__(_cls, {arg_list}): - 'Create new instance of {typename}({arg_list})' - return _tuple.__new__(_cls, ({arg_list})) - - @classmethod - def _make(cls, iterable, new=tuple.__new__, len=len): - 'Make a new {typename} object from a sequence or iterable' - result = new(cls, iterable) - if len(result) != {num_fields:d}: - raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result)) - return result - - def __repr__(self): - 'Return a nicely formatted representation string' - return '{typename}({repr_fmt})' % self - - def _asdict(self): - 'Return a new OrderedDict which maps field names to their values' - return OrderedDict(zip(self._fields, self)) - - def _replace(_self, **kwds): - 'Return a new {typename} object replacing specified fields with new values' - result = _self._make(map(kwds.pop, {field_names!r}, _self)) - if kwds: - raise ValueError('Got unexpected field names: %r' % kwds.keys()) - return result - - def __getnewargs__(self): - 'Return self as a plain tuple. Used by copy and pickle.' - return tuple(self) - - __dict__ = _property(_asdict) - - def __getstate__(self): - 'Exclude the OrderedDict from pickling' - pass - -{field_defs} -''' - -_repr_template = '{name}=%r' - -_field_template = '''\ - {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}') -''' - -def namedtuple(typename, field_names, verbose=False, rename=False): - """Returns a new subclass of tuple with named fields. - - >>> Point = namedtuple('Point', ['x', 'y']) - >>> Point.__doc__ # docstring for the new class - 'Point(x, y)' - >>> p = Point(11, y=22) # instantiate with positional args or keywords - >>> p[0] + p[1] # indexable like a plain tuple - 33 - >>> x, y = p # unpack like a regular tuple - >>> x, y - (11, 22) - >>> p.x + p.y # fields also accessible by name - 33 - >>> d = p._asdict() # convert to a dictionary - >>> d['x'] - 11 - >>> Point(**d) # convert from a dictionary - Point(x=11, y=22) - >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields - Point(x=100, y=22) - - """ - - # Validate the field names. At the user's option, either generate an error - # message or automatically replace the field name with a valid name. - if isinstance(field_names, basestring): - field_names = field_names.replace(',', ' ').split() - field_names = map(str, field_names) - typename = str(typename) - if rename: - seen = set() - for index, name in enumerate(field_names): - if (not all(c.isalnum() or c=='_' for c in name) - or _iskeyword(name) - or not name - or name[0].isdigit() - or name.startswith('_') - or name in seen): - field_names[index] = '_%d' % index - seen.add(name) - for name in [typename] + field_names: - if type(name) != str: - raise TypeError('Type names and field names must be strings') - if not all(c.isalnum() or c=='_' for c in name): - raise ValueError('Type names and field names can only contain ' - 'alphanumeric characters and underscores: %r' % name) - if _iskeyword(name): - raise ValueError('Type names and field names cannot be a ' - 'keyword: %r' % name) - if name[0].isdigit(): - raise ValueError('Type names and field names cannot start with ' - 'a number: %r' % name) - seen = set() - for name in field_names: - if name.startswith('_') and not rename: - raise ValueError('Field names cannot start with an underscore: ' - '%r' % name) - if name in seen: - raise ValueError('Encountered duplicate field name: %r' % name) - seen.add(name) - - # Fill-in the class template - class_definition = _class_template.format( - typename = typename, - field_names = tuple(field_names), - num_fields = len(field_names), - arg_list = repr(tuple(field_names)).replace("'", "")[1:-1], - repr_fmt = ', '.join(_repr_template.format(name=name) - for name in field_names), - field_defs = '\n'.join(_field_template.format(index=index, name=name) - for index, name in enumerate(field_names)) - ) - if verbose: - print class_definition - - # Execute the template string in a temporary namespace and support - # tracing utilities by setting a value for frame.f_globals['__name__'] - namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename, - OrderedDict=OrderedDict, _property=property, _tuple=tuple) - try: - exec class_definition in namespace - except SyntaxError as e: - raise SyntaxError(e.message + ':\n' + class_definition) - result = namespace[typename] - - # For pickling to work, the __module__ variable needs to be set to the frame - # where the named tuple is created. Bypass this step in environments where - # sys._getframe is not defined (Jython for example) or sys._getframe is not - # defined for arguments greater than 0 (IronPython). - try: - result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__') - except (AttributeError, ValueError): - pass - - return result - - -######################################################################## -### Counter -######################################################################## - -class Counter(dict): - '''Dict subclass for counting hashable items. Sometimes called a bag - or multiset. Elements are stored as dictionary keys and their counts - are stored as dictionary values. - - >>> c = Counter('abcdeabcdabcaba') # count elements from a string - - >>> c.most_common(3) # three most common elements - [('a', 5), ('b', 4), ('c', 3)] - >>> sorted(c) # list all unique elements - ['a', 'b', 'c', 'd', 'e'] - >>> ''.join(sorted(c.elements())) # list elements with repetitions - 'aaaaabbbbcccdde' - >>> sum(c.values()) # total of all counts - 15 - - >>> c['a'] # count of letter 'a' - 5 - >>> for elem in 'shazam': # update counts from an iterable - ... c[elem] += 1 # by adding 1 to each element's count - >>> c['a'] # now there are seven 'a' - 7 - >>> del c['b'] # remove all 'b' - >>> c['b'] # now there are zero 'b' - 0 - - >>> d = Counter('simsalabim') # make another counter - >>> c.update(d) # add in the second counter - >>> c['a'] # now there are nine 'a' - 9 - - >>> c.clear() # empty the counter - >>> c - Counter() - - Note: If a count is set to zero or reduced to zero, it will remain - in the counter until the entry is deleted or the counter is cleared: - - >>> c = Counter('aaabbc') - >>> c['b'] -= 2 # reduce the count of 'b' by two - >>> c.most_common() # 'b' is still in, but its count is zero - [('a', 3), ('c', 1), ('b', 0)] - - ''' - # References: - # http://en.wikipedia.org/wiki/Multiset - # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html - # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm - # http://code.activestate.com/recipes/259174/ - # Knuth, TAOCP Vol. II section 4.6.3 - - def __init__(*args, **kwds): - '''Create a new, empty Counter object. And if given, count elements - from an input iterable. Or, initialize the count from another mapping - of elements to their counts. - - >>> c = Counter() # a new, empty counter - >>> c = Counter('gallahad') # a new counter from an iterable - >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping - >>> c = Counter(a=4, b=2) # a new counter from keyword args - - ''' - if not args: - raise TypeError("descriptor '__init__' of 'Counter' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - super(Counter, self).__init__() - self.update(*args, **kwds) - - def __missing__(self, key): - 'The count of elements not in the Counter is zero.' - # Needed so that self[missing_item] does not raise KeyError - return 0 - - def most_common(self, n=None): - '''List the n most common elements and their counts from the most - common to the least. If n is None, then list all element counts. - - >>> Counter('abcdeabcdabcaba').most_common(3) - [('a', 5), ('b', 4), ('c', 3)] - - ''' - # Emulate Bag.sortedByCount from Smalltalk - if n is None: - return sorted(self.iteritems(), key=_itemgetter(1), reverse=True) - return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1)) - - def elements(self): - '''Iterator over elements repeating each as many times as its count. - - >>> c = Counter('ABCABC') - >>> sorted(c.elements()) - ['A', 'A', 'B', 'B', 'C', 'C'] - - # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 - >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) - >>> product = 1 - >>> for factor in prime_factors.elements(): # loop over factors - ... product *= factor # and multiply them - >>> product - 1836 - - Note, if an element's count has been set to zero or is a negative - number, elements() will ignore it. - - ''' - # Emulate Bag.do from Smalltalk and Multiset.begin from C++. - return _chain.from_iterable(_starmap(_repeat, self.iteritems())) - - # Override dict methods where necessary - - @classmethod - def fromkeys(cls, iterable, v=None): - # There is no equivalent method for counters because setting v=1 - # means that no element can have a count greater than one. - raise NotImplementedError( - 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') - - def update(*args, **kwds): - '''Like dict.update() but add counts instead of replacing them. - - Source can be an iterable, a dictionary, or another Counter instance. - - >>> c = Counter('which') - >>> c.update('witch') # add elements from another iterable - >>> d = Counter('watch') - >>> c.update(d) # add elements from another counter - >>> c['h'] # four 'h' in which, witch, and watch - 4 - - ''' - # The regular dict.update() operation makes no sense here because the - # replace behavior results in the some of original untouched counts - # being mixed-in with all of the other counts for a mismash that - # doesn't have a straight-forward interpretation in most counting - # contexts. Instead, we implement straight-addition. Both the inputs - # and outputs are allowed to contain zero and negative counts. - - if not args: - raise TypeError("descriptor 'update' of 'Counter' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - iterable = args[0] if args else None - if iterable is not None: - if isinstance(iterable, Mapping): - if self: - self_get = self.get - for elem, count in iterable.iteritems(): - self[elem] = self_get(elem, 0) + count - else: - super(Counter, self).update(iterable) # fast path when counter is empty - else: - self_get = self.get - for elem in iterable: - self[elem] = self_get(elem, 0) + 1 - if kwds: - self.update(kwds) - - def subtract(*args, **kwds): - '''Like dict.update() but subtracts counts instead of replacing them. - Counts can be reduced below zero. Both the inputs and outputs are - allowed to contain zero and negative counts. - - Source can be an iterable, a dictionary, or another Counter instance. - - >>> c = Counter('which') - >>> c.subtract('witch') # subtract elements from another iterable - >>> c.subtract(Counter('watch')) # subtract elements from another counter - >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch - 0 - >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch - -1 - - ''' - if not args: - raise TypeError("descriptor 'subtract' of 'Counter' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - iterable = args[0] if args else None - if iterable is not None: - self_get = self.get - if isinstance(iterable, Mapping): - for elem, count in iterable.items(): - self[elem] = self_get(elem, 0) - count - else: - for elem in iterable: - self[elem] = self_get(elem, 0) - 1 - if kwds: - self.subtract(kwds) - - def copy(self): - 'Return a shallow copy.' - return self.__class__(self) - - def __reduce__(self): - return self.__class__, (dict(self),) - - def __delitem__(self, elem): - 'Like dict.__delitem__() but does not raise KeyError for missing values.' - if elem in self: - super(Counter, self).__delitem__(elem) - - def __repr__(self): - if not self: - return '%s()' % self.__class__.__name__ - items = ', '.join(map('%r: %r'.__mod__, self.most_common())) - return '%s({%s})' % (self.__class__.__name__, items) - - # Multiset-style mathematical operations discussed in: - # Knuth TAOCP Volume II section 4.6.3 exercise 19 - # and at http://en.wikipedia.org/wiki/Multiset - # - # Outputs guaranteed to only include positive counts. - # - # To strip negative and zero counts, add-in an empty counter: - # c += Counter() - - def __add__(self, other): - '''Add counts from two counters. - - >>> Counter('abbb') + Counter('bcc') - Counter({'b': 4, 'c': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - newcount = count + other[elem] - if newcount > 0: - result[elem] = newcount - for elem, count in other.items(): - if elem not in self and count > 0: - result[elem] = count - return result - - def __sub__(self, other): - ''' Subtract count, but keep only results with positive counts. - - >>> Counter('abbbc') - Counter('bccd') - Counter({'b': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - newcount = count - other[elem] - if newcount > 0: - result[elem] = newcount - for elem, count in other.items(): - if elem not in self and count < 0: - result[elem] = 0 - count - return result - - def __or__(self, other): - '''Union is the maximum of value in either of the input counters. - - >>> Counter('abbb') | Counter('bcc') - Counter({'b': 3, 'c': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - other_count = other[elem] - newcount = other_count if count < other_count else count - if newcount > 0: - result[elem] = newcount - for elem, count in other.items(): - if elem not in self and count > 0: - result[elem] = count - return result - - def __and__(self, other): - ''' Intersection is the minimum of corresponding counts. - - >>> Counter('abbb') & Counter('bcc') - Counter({'b': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - other_count = other[elem] - newcount = count if count < other_count else other_count - if newcount > 0: - result[elem] = newcount - return result - - -if __name__ == '__main__': - # verify that instances can be pickled - from cPickle import loads, dumps - Point = namedtuple('Point', 'x, y', True) - p = Point(x=10, y=20) - assert p == loads(dumps(p)) - - # test and demonstrate ability to override methods - class Point(namedtuple('Point', 'x y')): - __slots__ = () - @property - def hypot(self): - return (self.x ** 2 + self.y ** 2) ** 0.5 - def __str__(self): - return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot) - - for p in Point(3, 4), Point(14, 5/7.): - print p - - class Point(namedtuple('Point', 'x y')): - 'Point class with optimized _make() and _replace() without error-checking' - __slots__ = () - _make = classmethod(tuple.__new__) - def _replace(self, _map=map, **kwds): - return self._make(_map(kwds.get, ('x', 'y'), self)) - - print Point(11, 22)._replace(x=100) - - Point3D = namedtuple('Point3D', Point._fields + ('z',)) - print Point3D.__doc__ - - import doctest - TestResults = namedtuple('TestResults', 'failed attempted') - print TestResults(*doctest.testmod()) diff --git a/python/Lib/colorsys.py b/python/Lib/colorsys.py deleted file mode 100755 index a6c0cf6a46..0000000000 --- a/python/Lib/colorsys.py +++ /dev/null @@ -1,156 +0,0 @@ -"""Conversion functions between RGB and other color systems. - -This modules provides two functions for each color system ABC: - - rgb_to_abc(r, g, b) --> a, b, c - abc_to_rgb(a, b, c) --> r, g, b - -All inputs and outputs are triples of floats in the range [0.0...1.0] -(with the exception of I and Q, which covers a slightly larger range). -Inputs outside the valid range may cause exceptions or invalid outputs. - -Supported color systems: -RGB: Red, Green, Blue components -YIQ: Luminance, Chrominance (used by composite video signals) -HLS: Hue, Luminance, Saturation -HSV: Hue, Saturation, Value -""" - -# References: -# http://en.wikipedia.org/wiki/YIQ -# http://en.wikipedia.org/wiki/HLS_color_space -# http://en.wikipedia.org/wiki/HSV_color_space - -__all__ = ["rgb_to_yiq","yiq_to_rgb","rgb_to_hls","hls_to_rgb", - "rgb_to_hsv","hsv_to_rgb"] - -# Some floating point constants - -ONE_THIRD = 1.0/3.0 -ONE_SIXTH = 1.0/6.0 -TWO_THIRD = 2.0/3.0 - -# YIQ: used by composite video signals (linear combinations of RGB) -# Y: perceived grey level (0.0 == black, 1.0 == white) -# I, Q: color components - -def rgb_to_yiq(r, g, b): - y = 0.30*r + 0.59*g + 0.11*b - i = 0.60*r - 0.28*g - 0.32*b - q = 0.21*r - 0.52*g + 0.31*b - return (y, i, q) - -def yiq_to_rgb(y, i, q): - r = y + 0.948262*i + 0.624013*q - g = y - 0.276066*i - 0.639810*q - b = y - 1.105450*i + 1.729860*q - if r < 0.0: - r = 0.0 - if g < 0.0: - g = 0.0 - if b < 0.0: - b = 0.0 - if r > 1.0: - r = 1.0 - if g > 1.0: - g = 1.0 - if b > 1.0: - b = 1.0 - return (r, g, b) - - -# HLS: Hue, Luminance, Saturation -# H: position in the spectrum -# L: color lightness -# S: color saturation - -def rgb_to_hls(r, g, b): - maxc = max(r, g, b) - minc = min(r, g, b) - # XXX Can optimize (maxc+minc) and (maxc-minc) - l = (minc+maxc)/2.0 - if minc == maxc: - return 0.0, l, 0.0 - if l <= 0.5: - s = (maxc-minc) / (maxc+minc) - else: - s = (maxc-minc) / (2.0-maxc-minc) - rc = (maxc-r) / (maxc-minc) - gc = (maxc-g) / (maxc-minc) - bc = (maxc-b) / (maxc-minc) - if r == maxc: - h = bc-gc - elif g == maxc: - h = 2.0+rc-bc - else: - h = 4.0+gc-rc - h = (h/6.0) % 1.0 - return h, l, s - -def hls_to_rgb(h, l, s): - if s == 0.0: - return l, l, l - if l <= 0.5: - m2 = l * (1.0+s) - else: - m2 = l+s-(l*s) - m1 = 2.0*l - m2 - return (_v(m1, m2, h+ONE_THIRD), _v(m1, m2, h), _v(m1, m2, h-ONE_THIRD)) - -def _v(m1, m2, hue): - hue = hue % 1.0 - if hue < ONE_SIXTH: - return m1 + (m2-m1)*hue*6.0 - if hue < 0.5: - return m2 - if hue < TWO_THIRD: - return m1 + (m2-m1)*(TWO_THIRD-hue)*6.0 - return m1 - - -# HSV: Hue, Saturation, Value -# H: position in the spectrum -# S: color saturation ("purity") -# V: color brightness - -def rgb_to_hsv(r, g, b): - maxc = max(r, g, b) - minc = min(r, g, b) - v = maxc - if minc == maxc: - return 0.0, 0.0, v - s = (maxc-minc) / maxc - rc = (maxc-r) / (maxc-minc) - gc = (maxc-g) / (maxc-minc) - bc = (maxc-b) / (maxc-minc) - if r == maxc: - h = bc-gc - elif g == maxc: - h = 2.0+rc-bc - else: - h = 4.0+gc-rc - h = (h/6.0) % 1.0 - return h, s, v - -def hsv_to_rgb(h, s, v): - if s == 0.0: - return v, v, v - i = int(h*6.0) # XXX assume int() truncates! - f = (h*6.0) - i - p = v*(1.0 - s) - q = v*(1.0 - s*f) - t = v*(1.0 - s*(1.0-f)) - i = i%6 - if i == 0: - return v, t, p - if i == 1: - return q, v, p - if i == 2: - return p, v, t - if i == 3: - return p, q, v - if i == 4: - return t, p, v - if i == 5: - return v, p, q - # Cannot get here diff --git a/python/Lib/commands.py b/python/Lib/commands.py deleted file mode 100755 index d0e8dd5fe9..0000000000 --- a/python/Lib/commands.py +++ /dev/null @@ -1,90 +0,0 @@ -"""Execute shell commands via os.popen() and return status, output. - -Interface summary: - - import commands - - outtext = commands.getoutput(cmd) - (exitstatus, outtext) = commands.getstatusoutput(cmd) - outtext = commands.getstatus(file) # returns output of "ls -ld file" - -A trailing newline is removed from the output string. - -Encapsulates the basic operation: - - pipe = os.popen('{ ' + cmd + '; } 2>&1', 'r') - text = pipe.read() - sts = pipe.close() - - [Note: it would be nice to add functions to interpret the exit status.] -""" -from warnings import warnpy3k -warnpy3k("the commands module has been removed in Python 3.0; " - "use the subprocess module instead", stacklevel=2) -del warnpy3k - -__all__ = ["getstatusoutput","getoutput","getstatus"] - -# Module 'commands' -# -# Various tools for executing commands and looking at their output and status. -# -# NB This only works (and is only relevant) for UNIX. - - -# Get 'ls -l' status for an object into a string -# -def getstatus(file): - """Return output of "ls -ld " in a string.""" - import warnings - warnings.warn("commands.getstatus() is deprecated", DeprecationWarning, 2) - return getoutput('ls -ld' + mkarg(file)) - - -# Get the output from a shell command into a string. -# The exit status is ignored; a trailing newline is stripped. -# Assume the command will work with '{ ... ; } 2>&1' around it.. -# -def getoutput(cmd): - """Return output (stdout or stderr) of executing cmd in a shell.""" - return getstatusoutput(cmd)[1] - - -# Ditto but preserving the exit status. -# Returns a pair (sts, output) -# -def getstatusoutput(cmd): - """Return (status, output) of executing cmd in a shell.""" - import os - pipe = os.popen('{ ' + cmd + '; } 2>&1', 'r') - text = pipe.read() - sts = pipe.close() - if sts is None: sts = 0 - if text[-1:] == '\n': text = text[:-1] - return sts, text - - -# Make command argument from directory and pathname (prefix space, add quotes). -# -def mk2arg(head, x): - import os - return mkarg(os.path.join(head, x)) - - -# Make a shell command argument from a string. -# Return a string beginning with a space followed by a shell-quoted -# version of the argument. -# Two strategies: enclose in single quotes if it contains none; -# otherwise, enclose in double quotes and prefix quotable characters -# with backslash. -# -def mkarg(x): - if '\'' not in x: - return ' \'' + x + '\'' - s = ' "' - for c in x: - if c in '\\$"`': - s = s + '\\' - s = s + c - s = s + '"' - return s diff --git a/python/Lib/compileall.py b/python/Lib/compileall.py deleted file mode 100755 index 5cfa8bed3f..0000000000 --- a/python/Lib/compileall.py +++ /dev/null @@ -1,227 +0,0 @@ -"""Module/script to byte-compile all .py files to .pyc (or .pyo) files. - -When called as a script with arguments, this compiles the directories -given as arguments recursively; the -l option prevents it from -recursing into directories. - -Without arguments, if compiles all modules on sys.path, without -recursing into subdirectories. (Even though it should do so for -packages -- for now, you'll have to deal with packages separately.) - -See module py_compile for details of the actual byte-compilation. -""" -import os -import sys -import py_compile -import struct -import imp - -__all__ = ["compile_dir","compile_file","compile_path"] - -def compile_dir(dir, maxlevels=10, ddir=None, - force=0, rx=None, quiet=0): - """Byte-compile all modules in the given directory tree. - - Arguments (only dir is required): - - dir: the directory to byte-compile - maxlevels: maximum recursion level (default 10) - ddir: the directory that will be prepended to the path to the - file as it is compiled into each byte-code file. - force: if 1, force compilation, even if timestamps are up-to-date - quiet: if 1, be quiet during compilation - """ - if not quiet: - print 'Listing', dir, '...' - try: - names = os.listdir(dir) - except os.error: - print "Can't list", dir - names = [] - names.sort() - success = 1 - for name in names: - fullname = os.path.join(dir, name) - if ddir is not None: - dfile = os.path.join(ddir, name) - else: - dfile = None - if not os.path.isdir(fullname): - if not compile_file(fullname, ddir, force, rx, quiet): - success = 0 - elif maxlevels > 0 and \ - name != os.curdir and name != os.pardir and \ - os.path.isdir(fullname) and \ - not os.path.islink(fullname): - if not compile_dir(fullname, maxlevels - 1, dfile, force, rx, - quiet): - success = 0 - return success - -def compile_file(fullname, ddir=None, force=0, rx=None, quiet=0): - """Byte-compile one file. - - Arguments (only fullname is required): - - fullname: the file to byte-compile - ddir: if given, the directory name compiled in to the - byte-code file. - force: if 1, force compilation, even if timestamps are up-to-date - quiet: if 1, be quiet during compilation - """ - success = 1 - name = os.path.basename(fullname) - if ddir is not None: - dfile = os.path.join(ddir, name) - else: - dfile = None - if rx is not None: - mo = rx.search(fullname) - if mo: - return success - if os.path.isfile(fullname): - head, tail = name[:-3], name[-3:] - if tail == '.py': - if not force: - try: - mtime = int(os.stat(fullname).st_mtime) - expect = struct.pack('<4sl', imp.get_magic(), mtime) - cfile = fullname + (__debug__ and 'c' or 'o') - with open(cfile, 'rb') as chandle: - actual = chandle.read(8) - if expect == actual: - return success - except IOError: - pass - if not quiet: - print 'Compiling', fullname, '...' - try: - ok = py_compile.compile(fullname, None, dfile, True) - except py_compile.PyCompileError,err: - if quiet: - print 'Compiling', fullname, '...' - print err.msg - success = 0 - except IOError, e: - print "Sorry", e - success = 0 - else: - if ok == 0: - success = 0 - return success - -def compile_path(skip_curdir=1, maxlevels=0, force=0, quiet=0): - """Byte-compile all module on sys.path. - - Arguments (all optional): - - skip_curdir: if true, skip current directory (default true) - maxlevels: max recursion level (default 0) - force: as for compile_dir() (default 0) - quiet: as for compile_dir() (default 0) - """ - success = 1 - for dir in sys.path: - if (not dir or dir == os.curdir) and skip_curdir: - print 'Skipping current directory' - else: - success = success and compile_dir(dir, maxlevels, None, - force, quiet=quiet) - return success - -def expand_args(args, flist): - """read names in flist and append to args""" - expanded = args[:] - if flist: - try: - if flist == '-': - fd = sys.stdin - else: - fd = open(flist) - while 1: - line = fd.readline() - if not line: - break - expanded.append(line[:-1]) - except IOError: - print "Error reading file list %s" % flist - raise - return expanded - -def main(): - """Script main program.""" - import getopt - try: - opts, args = getopt.getopt(sys.argv[1:], 'lfqd:x:i:') - except getopt.error, msg: - print msg - print "usage: python compileall.py [-l] [-f] [-q] [-d destdir] " \ - "[-x regexp] [-i list] [directory|file ...]" - print - print "arguments: zero or more file and directory names to compile; " \ - "if no arguments given, " - print " defaults to the equivalent of -l sys.path" - print - print "options:" - print "-l: don't recurse into subdirectories" - print "-f: force rebuild even if timestamps are up-to-date" - print "-q: output only error messages" - print "-d destdir: directory to prepend to file paths for use in " \ - "compile-time tracebacks and in" - print " runtime tracebacks in cases where the source " \ - "file is unavailable" - print "-x regexp: skip files matching the regular expression regexp; " \ - "the regexp is searched for" - print " in the full path of each file considered for " \ - "compilation" - print "-i file: add all the files and directories listed in file to " \ - "the list considered for" - print ' compilation; if "-", names are read from stdin' - - sys.exit(2) - maxlevels = 10 - ddir = None - force = 0 - quiet = 0 - rx = None - flist = None - for o, a in opts: - if o == '-l': maxlevels = 0 - if o == '-d': ddir = a - if o == '-f': force = 1 - if o == '-q': quiet = 1 - if o == '-x': - import re - rx = re.compile(a) - if o == '-i': flist = a - if ddir: - if len(args) != 1 and not os.path.isdir(args[0]): - print "-d destdir require exactly one directory argument" - sys.exit(2) - success = 1 - try: - if args or flist: - try: - if flist: - args = expand_args(args, flist) - except IOError: - success = 0 - if success: - for arg in args: - if os.path.isdir(arg): - if not compile_dir(arg, maxlevels, ddir, - force, rx, quiet): - success = 0 - else: - if not compile_file(arg, ddir, force, rx, quiet): - success = 0 - else: - success = compile_path() - except KeyboardInterrupt: - print "\n[interrupted]" - success = 0 - return success - -if __name__ == '__main__': - exit_status = int(not main()) - sys.exit(exit_status) diff --git a/python/Lib/compiler/__init__.py b/python/Lib/compiler/__init__.py deleted file mode 100755 index 2a6f64fa50..0000000000 --- a/python/Lib/compiler/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Package for parsing and compiling Python source code - -There are several functions defined at the top level that are imported -from modules contained in the package. - -parse(buf, mode="exec") -> AST - Converts a string containing Python source code to an abstract - syntax tree (AST). The AST is defined in compiler.ast. - -parseFile(path) -> AST - The same as parse(open(path)) - -walk(ast, visitor, verbose=None) - Does a pre-order walk over the ast using the visitor instance. - See compiler.visitor for details. - -compile(source, filename, mode, flags=None, dont_inherit=None) - Returns a code object. A replacement for the builtin compile() function. - -compileFile(filename) - Generates a .pyc file by compiling filename. -""" - -import warnings - -warnings.warn("The compiler package is deprecated and removed in Python 3.x.", - DeprecationWarning, stacklevel=2) - -from compiler.transformer import parse, parseFile -from compiler.visitor import walk -from compiler.pycodegen import compile, compileFile diff --git a/python/Lib/compiler/ast.py b/python/Lib/compiler/ast.py deleted file mode 100755 index 4c3fc161d3..0000000000 --- a/python/Lib/compiler/ast.py +++ /dev/null @@ -1,1419 +0,0 @@ -"""Python abstract syntax node definitions - -This file is automatically generated by Tools/compiler/astgen.py -""" -from compiler.consts import CO_VARARGS, CO_VARKEYWORDS - -def flatten(seq): - l = [] - for elt in seq: - t = type(elt) - if t is tuple or t is list: - for elt2 in flatten(elt): - l.append(elt2) - else: - l.append(elt) - return l - -def flatten_nodes(seq): - return [n for n in flatten(seq) if isinstance(n, Node)] - -nodes = {} - -class Node: - """Abstract base class for ast nodes.""" - def getChildren(self): - pass # implemented by subclasses - def __iter__(self): - for n in self.getChildren(): - yield n - def asList(self): # for backwards compatibility - return self.getChildren() - def getChildNodes(self): - pass # implemented by subclasses - -class EmptyNode(Node): - pass - -class Expression(Node): - # Expression is an artificial node class to support "eval" - nodes["expression"] = "Expression" - def __init__(self, node): - self.node = node - - def getChildren(self): - return self.node, - - def getChildNodes(self): - return self.node, - - def __repr__(self): - return "Expression(%s)" % (repr(self.node)) - -class Add(Node): - def __init__(self, leftright, lineno=None): - self.left = leftright[0] - self.right = leftright[1] - self.lineno = lineno - - def getChildren(self): - return self.left, self.right - - def getChildNodes(self): - return self.left, self.right - - def __repr__(self): - return "Add((%s, %s))" % (repr(self.left), repr(self.right)) - -class And(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "And(%s)" % (repr(self.nodes),) - -class AssAttr(Node): - def __init__(self, expr, attrname, flags, lineno=None): - self.expr = expr - self.attrname = attrname - self.flags = flags - self.lineno = lineno - - def getChildren(self): - return self.expr, self.attrname, self.flags - - def getChildNodes(self): - return self.expr, - - def __repr__(self): - return "AssAttr(%s, %s, %s)" % (repr(self.expr), repr(self.attrname), repr(self.flags)) - -class AssList(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "AssList(%s)" % (repr(self.nodes),) - -class AssName(Node): - def __init__(self, name, flags, lineno=None): - self.name = name - self.flags = flags - self.lineno = lineno - - def getChildren(self): - return self.name, self.flags - - def getChildNodes(self): - return () - - def __repr__(self): - return "AssName(%s, %s)" % (repr(self.name), repr(self.flags)) - -class AssTuple(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "AssTuple(%s)" % (repr(self.nodes),) - -class Assert(Node): - def __init__(self, test, fail, lineno=None): - self.test = test - self.fail = fail - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.test) - children.append(self.fail) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.test) - if self.fail is not None: - nodelist.append(self.fail) - return tuple(nodelist) - - def __repr__(self): - return "Assert(%s, %s)" % (repr(self.test), repr(self.fail)) - -class Assign(Node): - def __init__(self, nodes, expr, lineno=None): - self.nodes = nodes - self.expr = expr - self.lineno = lineno - - def getChildren(self): - children = [] - children.extend(flatten(self.nodes)) - children.append(self.expr) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - nodelist.append(self.expr) - return tuple(nodelist) - - def __repr__(self): - return "Assign(%s, %s)" % (repr(self.nodes), repr(self.expr)) - -class AugAssign(Node): - def __init__(self, node, op, expr, lineno=None): - self.node = node - self.op = op - self.expr = expr - self.lineno = lineno - - def getChildren(self): - return self.node, self.op, self.expr - - def getChildNodes(self): - return self.node, self.expr - - def __repr__(self): - return "AugAssign(%s, %s, %s)" % (repr(self.node), repr(self.op), repr(self.expr)) - -class Backquote(Node): - def __init__(self, expr, lineno=None): - self.expr = expr - self.lineno = lineno - - def getChildren(self): - return self.expr, - - def getChildNodes(self): - return self.expr, - - def __repr__(self): - return "Backquote(%s)" % (repr(self.expr),) - -class Bitand(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "Bitand(%s)" % (repr(self.nodes),) - -class Bitor(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "Bitor(%s)" % (repr(self.nodes),) - -class Bitxor(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "Bitxor(%s)" % (repr(self.nodes),) - -class Break(Node): - def __init__(self, lineno=None): - self.lineno = lineno - - def getChildren(self): - return () - - def getChildNodes(self): - return () - - def __repr__(self): - return "Break()" - -class CallFunc(Node): - def __init__(self, node, args, star_args = None, dstar_args = None, lineno=None): - self.node = node - self.args = args - self.star_args = star_args - self.dstar_args = dstar_args - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.node) - children.extend(flatten(self.args)) - children.append(self.star_args) - children.append(self.dstar_args) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.node) - nodelist.extend(flatten_nodes(self.args)) - if self.star_args is not None: - nodelist.append(self.star_args) - if self.dstar_args is not None: - nodelist.append(self.dstar_args) - return tuple(nodelist) - - def __repr__(self): - return "CallFunc(%s, %s, %s, %s)" % (repr(self.node), repr(self.args), repr(self.star_args), repr(self.dstar_args)) - -class Class(Node): - def __init__(self, name, bases, doc, code, decorators = None, lineno=None): - self.name = name - self.bases = bases - self.doc = doc - self.code = code - self.decorators = decorators - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.name) - children.extend(flatten(self.bases)) - children.append(self.doc) - children.append(self.code) - children.append(self.decorators) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.bases)) - nodelist.append(self.code) - if self.decorators is not None: - nodelist.append(self.decorators) - return tuple(nodelist) - - def __repr__(self): - return "Class(%s, %s, %s, %s, %s)" % (repr(self.name), repr(self.bases), repr(self.doc), repr(self.code), repr(self.decorators)) - -class Compare(Node): - def __init__(self, expr, ops, lineno=None): - self.expr = expr - self.ops = ops - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.expr) - children.extend(flatten(self.ops)) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.expr) - nodelist.extend(flatten_nodes(self.ops)) - return tuple(nodelist) - - def __repr__(self): - return "Compare(%s, %s)" % (repr(self.expr), repr(self.ops)) - -class Const(Node): - def __init__(self, value, lineno=None): - self.value = value - self.lineno = lineno - - def getChildren(self): - return self.value, - - def getChildNodes(self): - return () - - def __repr__(self): - return "Const(%s)" % (repr(self.value),) - -class Continue(Node): - def __init__(self, lineno=None): - self.lineno = lineno - - def getChildren(self): - return () - - def getChildNodes(self): - return () - - def __repr__(self): - return "Continue()" - -class Decorators(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "Decorators(%s)" % (repr(self.nodes),) - -class Dict(Node): - def __init__(self, items, lineno=None): - self.items = items - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.items)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.items)) - return tuple(nodelist) - - def __repr__(self): - return "Dict(%s)" % (repr(self.items),) - -class Discard(Node): - def __init__(self, expr, lineno=None): - self.expr = expr - self.lineno = lineno - - def getChildren(self): - return self.expr, - - def getChildNodes(self): - return self.expr, - - def __repr__(self): - return "Discard(%s)" % (repr(self.expr),) - -class Div(Node): - def __init__(self, leftright, lineno=None): - self.left = leftright[0] - self.right = leftright[1] - self.lineno = lineno - - def getChildren(self): - return self.left, self.right - - def getChildNodes(self): - return self.left, self.right - - def __repr__(self): - return "Div((%s, %s))" % (repr(self.left), repr(self.right)) - -class Ellipsis(Node): - def __init__(self, lineno=None): - self.lineno = lineno - - def getChildren(self): - return () - - def getChildNodes(self): - return () - - def __repr__(self): - return "Ellipsis()" - -class Exec(Node): - def __init__(self, expr, locals, globals, lineno=None): - self.expr = expr - self.locals = locals - self.globals = globals - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.expr) - children.append(self.locals) - children.append(self.globals) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.expr) - if self.locals is not None: - nodelist.append(self.locals) - if self.globals is not None: - nodelist.append(self.globals) - return tuple(nodelist) - - def __repr__(self): - return "Exec(%s, %s, %s)" % (repr(self.expr), repr(self.locals), repr(self.globals)) - -class FloorDiv(Node): - def __init__(self, leftright, lineno=None): - self.left = leftright[0] - self.right = leftright[1] - self.lineno = lineno - - def getChildren(self): - return self.left, self.right - - def getChildNodes(self): - return self.left, self.right - - def __repr__(self): - return "FloorDiv((%s, %s))" % (repr(self.left), repr(self.right)) - -class For(Node): - def __init__(self, assign, list, body, else_, lineno=None): - self.assign = assign - self.list = list - self.body = body - self.else_ = else_ - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.assign) - children.append(self.list) - children.append(self.body) - children.append(self.else_) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.assign) - nodelist.append(self.list) - nodelist.append(self.body) - if self.else_ is not None: - nodelist.append(self.else_) - return tuple(nodelist) - - def __repr__(self): - return "For(%s, %s, %s, %s)" % (repr(self.assign), repr(self.list), repr(self.body), repr(self.else_)) - -class From(Node): - def __init__(self, modname, names, level, lineno=None): - self.modname = modname - self.names = names - self.level = level - self.lineno = lineno - - def getChildren(self): - return self.modname, self.names, self.level - - def getChildNodes(self): - return () - - def __repr__(self): - return "From(%s, %s, %s)" % (repr(self.modname), repr(self.names), repr(self.level)) - -class Function(Node): - def __init__(self, decorators, name, argnames, defaults, flags, doc, code, lineno=None): - self.decorators = decorators - self.name = name - self.argnames = argnames - self.defaults = defaults - self.flags = flags - self.doc = doc - self.code = code - self.lineno = lineno - self.varargs = self.kwargs = None - if flags & CO_VARARGS: - self.varargs = 1 - if flags & CO_VARKEYWORDS: - self.kwargs = 1 - - - def getChildren(self): - children = [] - children.append(self.decorators) - children.append(self.name) - children.append(self.argnames) - children.extend(flatten(self.defaults)) - children.append(self.flags) - children.append(self.doc) - children.append(self.code) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - if self.decorators is not None: - nodelist.append(self.decorators) - nodelist.extend(flatten_nodes(self.defaults)) - nodelist.append(self.code) - return tuple(nodelist) - - def __repr__(self): - return "Function(%s, %s, %s, %s, %s, %s, %s)" % (repr(self.decorators), repr(self.name), repr(self.argnames), repr(self.defaults), repr(self.flags), repr(self.doc), repr(self.code)) - -class GenExpr(Node): - def __init__(self, code, lineno=None): - self.code = code - self.lineno = lineno - self.argnames = ['.0'] - self.varargs = self.kwargs = None - - - def getChildren(self): - return self.code, - - def getChildNodes(self): - return self.code, - - def __repr__(self): - return "GenExpr(%s)" % (repr(self.code),) - -class GenExprFor(Node): - def __init__(self, assign, iter, ifs, lineno=None): - self.assign = assign - self.iter = iter - self.ifs = ifs - self.lineno = lineno - self.is_outmost = False - - def getChildren(self): - children = [] - children.append(self.assign) - children.append(self.iter) - children.extend(flatten(self.ifs)) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.assign) - nodelist.append(self.iter) - nodelist.extend(flatten_nodes(self.ifs)) - return tuple(nodelist) - - def __repr__(self): - return "GenExprFor(%s, %s, %s)" % (repr(self.assign), repr(self.iter), repr(self.ifs)) - -class GenExprIf(Node): - def __init__(self, test, lineno=None): - self.test = test - self.lineno = lineno - - def getChildren(self): - return self.test, - - def getChildNodes(self): - return self.test, - - def __repr__(self): - return "GenExprIf(%s)" % (repr(self.test),) - -class GenExprInner(Node): - def __init__(self, expr, quals, lineno=None): - self.expr = expr - self.quals = quals - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.expr) - children.extend(flatten(self.quals)) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.expr) - nodelist.extend(flatten_nodes(self.quals)) - return tuple(nodelist) - - def __repr__(self): - return "GenExprInner(%s, %s)" % (repr(self.expr), repr(self.quals)) - -class Getattr(Node): - def __init__(self, expr, attrname, lineno=None): - self.expr = expr - self.attrname = attrname - self.lineno = lineno - - def getChildren(self): - return self.expr, self.attrname - - def getChildNodes(self): - return self.expr, - - def __repr__(self): - return "Getattr(%s, %s)" % (repr(self.expr), repr(self.attrname)) - -class Global(Node): - def __init__(self, names, lineno=None): - self.names = names - self.lineno = lineno - - def getChildren(self): - return self.names, - - def getChildNodes(self): - return () - - def __repr__(self): - return "Global(%s)" % (repr(self.names),) - -class If(Node): - def __init__(self, tests, else_, lineno=None): - self.tests = tests - self.else_ = else_ - self.lineno = lineno - - def getChildren(self): - children = [] - children.extend(flatten(self.tests)) - children.append(self.else_) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.tests)) - if self.else_ is not None: - nodelist.append(self.else_) - return tuple(nodelist) - - def __repr__(self): - return "If(%s, %s)" % (repr(self.tests), repr(self.else_)) - -class IfExp(Node): - def __init__(self, test, then, else_, lineno=None): - self.test = test - self.then = then - self.else_ = else_ - self.lineno = lineno - - def getChildren(self): - return self.test, self.then, self.else_ - - def getChildNodes(self): - return self.test, self.then, self.else_ - - def __repr__(self): - return "IfExp(%s, %s, %s)" % (repr(self.test), repr(self.then), repr(self.else_)) - -class Import(Node): - def __init__(self, names, lineno=None): - self.names = names - self.lineno = lineno - - def getChildren(self): - return self.names, - - def getChildNodes(self): - return () - - def __repr__(self): - return "Import(%s)" % (repr(self.names),) - -class Invert(Node): - def __init__(self, expr, lineno=None): - self.expr = expr - self.lineno = lineno - - def getChildren(self): - return self.expr, - - def getChildNodes(self): - return self.expr, - - def __repr__(self): - return "Invert(%s)" % (repr(self.expr),) - -class Keyword(Node): - def __init__(self, name, expr, lineno=None): - self.name = name - self.expr = expr - self.lineno = lineno - - def getChildren(self): - return self.name, self.expr - - def getChildNodes(self): - return self.expr, - - def __repr__(self): - return "Keyword(%s, %s)" % (repr(self.name), repr(self.expr)) - -class Lambda(Node): - def __init__(self, argnames, defaults, flags, code, lineno=None): - self.argnames = argnames - self.defaults = defaults - self.flags = flags - self.code = code - self.lineno = lineno - self.varargs = self.kwargs = None - if flags & CO_VARARGS: - self.varargs = 1 - if flags & CO_VARKEYWORDS: - self.kwargs = 1 - - - def getChildren(self): - children = [] - children.append(self.argnames) - children.extend(flatten(self.defaults)) - children.append(self.flags) - children.append(self.code) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.defaults)) - nodelist.append(self.code) - return tuple(nodelist) - - def __repr__(self): - return "Lambda(%s, %s, %s, %s)" % (repr(self.argnames), repr(self.defaults), repr(self.flags), repr(self.code)) - -class LeftShift(Node): - def __init__(self, leftright, lineno=None): - self.left = leftright[0] - self.right = leftright[1] - self.lineno = lineno - - def getChildren(self): - return self.left, self.right - - def getChildNodes(self): - return self.left, self.right - - def __repr__(self): - return "LeftShift((%s, %s))" % (repr(self.left), repr(self.right)) - -class List(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "List(%s)" % (repr(self.nodes),) - -class ListComp(Node): - def __init__(self, expr, quals, lineno=None): - self.expr = expr - self.quals = quals - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.expr) - children.extend(flatten(self.quals)) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.expr) - nodelist.extend(flatten_nodes(self.quals)) - return tuple(nodelist) - - def __repr__(self): - return "ListComp(%s, %s)" % (repr(self.expr), repr(self.quals)) - -class ListCompFor(Node): - def __init__(self, assign, list, ifs, lineno=None): - self.assign = assign - self.list = list - self.ifs = ifs - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.assign) - children.append(self.list) - children.extend(flatten(self.ifs)) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.assign) - nodelist.append(self.list) - nodelist.extend(flatten_nodes(self.ifs)) - return tuple(nodelist) - - def __repr__(self): - return "ListCompFor(%s, %s, %s)" % (repr(self.assign), repr(self.list), repr(self.ifs)) - -class ListCompIf(Node): - def __init__(self, test, lineno=None): - self.test = test - self.lineno = lineno - - def getChildren(self): - return self.test, - - def getChildNodes(self): - return self.test, - - def __repr__(self): - return "ListCompIf(%s)" % (repr(self.test),) - -class SetComp(Node): - def __init__(self, expr, quals, lineno=None): - self.expr = expr - self.quals = quals - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.expr) - children.extend(flatten(self.quals)) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.expr) - nodelist.extend(flatten_nodes(self.quals)) - return tuple(nodelist) - - def __repr__(self): - return "SetComp(%s, %s)" % (repr(self.expr), repr(self.quals)) - -class DictComp(Node): - def __init__(self, key, value, quals, lineno=None): - self.key = key - self.value = value - self.quals = quals - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.key) - children.append(self.value) - children.extend(flatten(self.quals)) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.key) - nodelist.append(self.value) - nodelist.extend(flatten_nodes(self.quals)) - return tuple(nodelist) - - def __repr__(self): - return "DictComp(%s, %s, %s)" % (repr(self.key), repr(self.value), repr(self.quals)) - -class Mod(Node): - def __init__(self, leftright, lineno=None): - self.left = leftright[0] - self.right = leftright[1] - self.lineno = lineno - - def getChildren(self): - return self.left, self.right - - def getChildNodes(self): - return self.left, self.right - - def __repr__(self): - return "Mod((%s, %s))" % (repr(self.left), repr(self.right)) - -class Module(Node): - def __init__(self, doc, node, lineno=None): - self.doc = doc - self.node = node - self.lineno = lineno - - def getChildren(self): - return self.doc, self.node - - def getChildNodes(self): - return self.node, - - def __repr__(self): - return "Module(%s, %s)" % (repr(self.doc), repr(self.node)) - -class Mul(Node): - def __init__(self, leftright, lineno=None): - self.left = leftright[0] - self.right = leftright[1] - self.lineno = lineno - - def getChildren(self): - return self.left, self.right - - def getChildNodes(self): - return self.left, self.right - - def __repr__(self): - return "Mul((%s, %s))" % (repr(self.left), repr(self.right)) - -class Name(Node): - def __init__(self, name, lineno=None): - self.name = name - self.lineno = lineno - - def getChildren(self): - return self.name, - - def getChildNodes(self): - return () - - def __repr__(self): - return "Name(%s)" % (repr(self.name),) - -class Not(Node): - def __init__(self, expr, lineno=None): - self.expr = expr - self.lineno = lineno - - def getChildren(self): - return self.expr, - - def getChildNodes(self): - return self.expr, - - def __repr__(self): - return "Not(%s)" % (repr(self.expr),) - -class Or(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "Or(%s)" % (repr(self.nodes),) - -class Pass(Node): - def __init__(self, lineno=None): - self.lineno = lineno - - def getChildren(self): - return () - - def getChildNodes(self): - return () - - def __repr__(self): - return "Pass()" - -class Power(Node): - def __init__(self, leftright, lineno=None): - self.left = leftright[0] - self.right = leftright[1] - self.lineno = lineno - - def getChildren(self): - return self.left, self.right - - def getChildNodes(self): - return self.left, self.right - - def __repr__(self): - return "Power((%s, %s))" % (repr(self.left), repr(self.right)) - -class Print(Node): - def __init__(self, nodes, dest, lineno=None): - self.nodes = nodes - self.dest = dest - self.lineno = lineno - - def getChildren(self): - children = [] - children.extend(flatten(self.nodes)) - children.append(self.dest) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - if self.dest is not None: - nodelist.append(self.dest) - return tuple(nodelist) - - def __repr__(self): - return "Print(%s, %s)" % (repr(self.nodes), repr(self.dest)) - -class Printnl(Node): - def __init__(self, nodes, dest, lineno=None): - self.nodes = nodes - self.dest = dest - self.lineno = lineno - - def getChildren(self): - children = [] - children.extend(flatten(self.nodes)) - children.append(self.dest) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - if self.dest is not None: - nodelist.append(self.dest) - return tuple(nodelist) - - def __repr__(self): - return "Printnl(%s, %s)" % (repr(self.nodes), repr(self.dest)) - -class Raise(Node): - def __init__(self, expr1, expr2, expr3, lineno=None): - self.expr1 = expr1 - self.expr2 = expr2 - self.expr3 = expr3 - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.expr1) - children.append(self.expr2) - children.append(self.expr3) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - if self.expr1 is not None: - nodelist.append(self.expr1) - if self.expr2 is not None: - nodelist.append(self.expr2) - if self.expr3 is not None: - nodelist.append(self.expr3) - return tuple(nodelist) - - def __repr__(self): - return "Raise(%s, %s, %s)" % (repr(self.expr1), repr(self.expr2), repr(self.expr3)) - -class Return(Node): - def __init__(self, value, lineno=None): - self.value = value - self.lineno = lineno - - def getChildren(self): - return self.value, - - def getChildNodes(self): - return self.value, - - def __repr__(self): - return "Return(%s)" % (repr(self.value),) - -class RightShift(Node): - def __init__(self, leftright, lineno=None): - self.left = leftright[0] - self.right = leftright[1] - self.lineno = lineno - - def getChildren(self): - return self.left, self.right - - def getChildNodes(self): - return self.left, self.right - - def __repr__(self): - return "RightShift((%s, %s))" % (repr(self.left), repr(self.right)) - -class Set(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "Set(%s)" % (repr(self.nodes),) - -class Slice(Node): - def __init__(self, expr, flags, lower, upper, lineno=None): - self.expr = expr - self.flags = flags - self.lower = lower - self.upper = upper - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.expr) - children.append(self.flags) - children.append(self.lower) - children.append(self.upper) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.expr) - if self.lower is not None: - nodelist.append(self.lower) - if self.upper is not None: - nodelist.append(self.upper) - return tuple(nodelist) - - def __repr__(self): - return "Slice(%s, %s, %s, %s)" % (repr(self.expr), repr(self.flags), repr(self.lower), repr(self.upper)) - -class Sliceobj(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "Sliceobj(%s)" % (repr(self.nodes),) - -class Stmt(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "Stmt(%s)" % (repr(self.nodes),) - -class Sub(Node): - def __init__(self, leftright, lineno=None): - self.left = leftright[0] - self.right = leftright[1] - self.lineno = lineno - - def getChildren(self): - return self.left, self.right - - def getChildNodes(self): - return self.left, self.right - - def __repr__(self): - return "Sub((%s, %s))" % (repr(self.left), repr(self.right)) - -class Subscript(Node): - def __init__(self, expr, flags, subs, lineno=None): - self.expr = expr - self.flags = flags - self.subs = subs - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.expr) - children.append(self.flags) - children.extend(flatten(self.subs)) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.expr) - nodelist.extend(flatten_nodes(self.subs)) - return tuple(nodelist) - - def __repr__(self): - return "Subscript(%s, %s, %s)" % (repr(self.expr), repr(self.flags), repr(self.subs)) - -class TryExcept(Node): - def __init__(self, body, handlers, else_, lineno=None): - self.body = body - self.handlers = handlers - self.else_ = else_ - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.body) - children.extend(flatten(self.handlers)) - children.append(self.else_) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.body) - nodelist.extend(flatten_nodes(self.handlers)) - if self.else_ is not None: - nodelist.append(self.else_) - return tuple(nodelist) - - def __repr__(self): - return "TryExcept(%s, %s, %s)" % (repr(self.body), repr(self.handlers), repr(self.else_)) - -class TryFinally(Node): - def __init__(self, body, final, lineno=None): - self.body = body - self.final = final - self.lineno = lineno - - def getChildren(self): - return self.body, self.final - - def getChildNodes(self): - return self.body, self.final - - def __repr__(self): - return "TryFinally(%s, %s)" % (repr(self.body), repr(self.final)) - -class Tuple(Node): - def __init__(self, nodes, lineno=None): - self.nodes = nodes - self.lineno = lineno - - def getChildren(self): - return tuple(flatten(self.nodes)) - - def getChildNodes(self): - nodelist = [] - nodelist.extend(flatten_nodes(self.nodes)) - return tuple(nodelist) - - def __repr__(self): - return "Tuple(%s)" % (repr(self.nodes),) - -class UnaryAdd(Node): - def __init__(self, expr, lineno=None): - self.expr = expr - self.lineno = lineno - - def getChildren(self): - return self.expr, - - def getChildNodes(self): - return self.expr, - - def __repr__(self): - return "UnaryAdd(%s)" % (repr(self.expr),) - -class UnarySub(Node): - def __init__(self, expr, lineno=None): - self.expr = expr - self.lineno = lineno - - def getChildren(self): - return self.expr, - - def getChildNodes(self): - return self.expr, - - def __repr__(self): - return "UnarySub(%s)" % (repr(self.expr),) - -class While(Node): - def __init__(self, test, body, else_, lineno=None): - self.test = test - self.body = body - self.else_ = else_ - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.test) - children.append(self.body) - children.append(self.else_) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.test) - nodelist.append(self.body) - if self.else_ is not None: - nodelist.append(self.else_) - return tuple(nodelist) - - def __repr__(self): - return "While(%s, %s, %s)" % (repr(self.test), repr(self.body), repr(self.else_)) - -class With(Node): - def __init__(self, expr, vars, body, lineno=None): - self.expr = expr - self.vars = vars - self.body = body - self.lineno = lineno - - def getChildren(self): - children = [] - children.append(self.expr) - children.append(self.vars) - children.append(self.body) - return tuple(children) - - def getChildNodes(self): - nodelist = [] - nodelist.append(self.expr) - if self.vars is not None: - nodelist.append(self.vars) - nodelist.append(self.body) - return tuple(nodelist) - - def __repr__(self): - return "With(%s, %s, %s)" % (repr(self.expr), repr(self.vars), repr(self.body)) - -class Yield(Node): - def __init__(self, value, lineno=None): - self.value = value - self.lineno = lineno - - def getChildren(self): - return self.value, - - def getChildNodes(self): - return self.value, - - def __repr__(self): - return "Yield(%s)" % (repr(self.value),) - -for name, obj in globals().items(): - if isinstance(obj, type) and issubclass(obj, Node): - nodes[name.lower()] = obj diff --git a/python/Lib/compiler/consts.py b/python/Lib/compiler/consts.py deleted file mode 100755 index c60b1d0b4f..0000000000 --- a/python/Lib/compiler/consts.py +++ /dev/null @@ -1,23 +0,0 @@ -# operation flags -OP_ASSIGN = 'OP_ASSIGN' -OP_DELETE = 'OP_DELETE' -OP_APPLY = 'OP_APPLY' - -SC_LOCAL = 1 -SC_GLOBAL_IMPLICIT = 2 -SC_GLOBAL_EXPLICIT = 3 -SC_FREE = 4 -SC_CELL = 5 -SC_UNKNOWN = 6 - -CO_OPTIMIZED = 0x0001 -CO_NEWLOCALS = 0x0002 -CO_VARARGS = 0x0004 -CO_VARKEYWORDS = 0x0008 -CO_NESTED = 0x0010 -CO_GENERATOR = 0x0020 -CO_GENERATOR_ALLOWED = 0 -CO_FUTURE_DIVISION = 0x2000 -CO_FUTURE_ABSIMPORT = 0x4000 -CO_FUTURE_WITH_STATEMENT = 0x8000 -CO_FUTURE_PRINT_FUNCTION = 0x10000 diff --git a/python/Lib/compiler/future.py b/python/Lib/compiler/future.py deleted file mode 100755 index fd5e5dfb37..0000000000 --- a/python/Lib/compiler/future.py +++ /dev/null @@ -1,74 +0,0 @@ -"""Parser for future statements - -""" - -from compiler import ast, walk - -def is_future(stmt): - """Return true if statement is a well-formed future statement""" - if not isinstance(stmt, ast.From): - return 0 - if stmt.modname == "__future__": - return 1 - else: - return 0 - -class FutureParser: - - features = ("nested_scopes", "generators", "division", - "absolute_import", "with_statement", "print_function", - "unicode_literals") - - def __init__(self): - self.found = {} # set - - def visitModule(self, node): - stmt = node.node - for s in stmt.nodes: - if not self.check_stmt(s): - break - - def check_stmt(self, stmt): - if is_future(stmt): - for name, asname in stmt.names: - if name in self.features: - self.found[name] = 1 - else: - raise SyntaxError, \ - "future feature %s is not defined" % name - stmt.valid_future = 1 - return 1 - return 0 - - def get_features(self): - """Return list of features enabled by future statements""" - return self.found.keys() - -class BadFutureParser: - """Check for invalid future statements""" - - def visitFrom(self, node): - if hasattr(node, 'valid_future'): - return - if node.modname != "__future__": - return - raise SyntaxError, "invalid future statement " + repr(node) - -def find_futures(node): - p1 = FutureParser() - p2 = BadFutureParser() - walk(node, p1) - walk(node, p2) - return p1.get_features() - -if __name__ == "__main__": - import sys - from compiler import parseFile, walk - - for file in sys.argv[1:]: - print file - tree = parseFile(file) - v = FutureParser() - walk(tree, v) - print v.found - print diff --git a/python/Lib/compiler/misc.py b/python/Lib/compiler/misc.py deleted file mode 100755 index 588c7fbd5a..0000000000 --- a/python/Lib/compiler/misc.py +++ /dev/null @@ -1,73 +0,0 @@ - -def flatten(tup): - elts = [] - for elt in tup: - if isinstance(elt, tuple): - elts = elts + flatten(elt) - else: - elts.append(elt) - return elts - -class Set: - def __init__(self): - self.elts = {} - def __len__(self): - return len(self.elts) - def __contains__(self, elt): - return elt in self.elts - def add(self, elt): - self.elts[elt] = elt - def elements(self): - return self.elts.keys() - def has_elt(self, elt): - return elt in self.elts - def remove(self, elt): - del self.elts[elt] - def copy(self): - c = Set() - c.elts.update(self.elts) - return c - -class Stack: - def __init__(self): - self.stack = [] - self.pop = self.stack.pop - def __len__(self): - return len(self.stack) - def push(self, elt): - self.stack.append(elt) - def top(self): - return self.stack[-1] - def __getitem__(self, index): # needed by visitContinue() - return self.stack[index] - -MANGLE_LEN = 256 # magic constant from compile.c - -def mangle(name, klass): - if not name.startswith('__'): - return name - if len(name) + 2 >= MANGLE_LEN: - return name - if name.endswith('__'): - return name - try: - i = 0 - while klass[i] == '_': - i = i + 1 - except IndexError: - return name - klass = klass[i:] - - tlen = len(klass) + len(name) - if tlen > MANGLE_LEN: - klass = klass[:MANGLE_LEN-tlen] - - return "_%s%s" % (klass, name) - -def set_filename(filename, tree): - """Set the filename attribute to filename on every node in tree""" - worklist = [tree] - while worklist: - node = worklist.pop(0) - node.filename = filename - worklist.extend(node.getChildNodes()) diff --git a/python/Lib/compiler/pyassem.py b/python/Lib/compiler/pyassem.py deleted file mode 100755 index f52f7d079f..0000000000 --- a/python/Lib/compiler/pyassem.py +++ /dev/null @@ -1,763 +0,0 @@ -"""A flow graph representation for Python bytecode""" - -import dis -import types -import sys - -from compiler import misc -from compiler.consts \ - import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS - -class FlowGraph: - def __init__(self): - self.current = self.entry = Block() - self.exit = Block("exit") - self.blocks = misc.Set() - self.blocks.add(self.entry) - self.blocks.add(self.exit) - - def startBlock(self, block): - if self._debug: - if self.current: - print "end", repr(self.current) - print " next", self.current.next - print " prev", self.current.prev - print " ", self.current.get_children() - print repr(block) - self.current = block - - def nextBlock(self, block=None): - # XXX think we need to specify when there is implicit transfer - # from one block to the next. might be better to represent this - # with explicit JUMP_ABSOLUTE instructions that are optimized - # out when they are unnecessary. - # - # I think this strategy works: each block has a child - # designated as "next" which is returned as the last of the - # children. because the nodes in a graph are emitted in - # reverse post order, the "next" block will always be emitted - # immediately after its parent. - # Worry: maintaining this invariant could be tricky - if block is None: - block = self.newBlock() - - # Note: If the current block ends with an unconditional control - # transfer, then it is techically incorrect to add an implicit - # transfer to the block graph. Doing so results in code generation - # for unreachable blocks. That doesn't appear to be very common - # with Python code and since the built-in compiler doesn't optimize - # it out we don't either. - self.current.addNext(block) - self.startBlock(block) - - def newBlock(self): - b = Block() - self.blocks.add(b) - return b - - def startExitBlock(self): - self.startBlock(self.exit) - - _debug = 0 - - def _enable_debug(self): - self._debug = 1 - - def _disable_debug(self): - self._debug = 0 - - def emit(self, *inst): - if self._debug: - print "\t", inst - if len(inst) == 2 and isinstance(inst[1], Block): - self.current.addOutEdge(inst[1]) - self.current.emit(inst) - - def getBlocksInOrder(self): - """Return the blocks in reverse postorder - - i.e. each node appears before all of its successors - """ - order = order_blocks(self.entry, self.exit) - return order - - def getBlocks(self): - return self.blocks.elements() - - def getRoot(self): - """Return nodes appropriate for use with dominator""" - return self.entry - - def getContainedGraphs(self): - l = [] - for b in self.getBlocks(): - l.extend(b.getContainedGraphs()) - return l - - -def order_blocks(start_block, exit_block): - """Order blocks so that they are emitted in the right order""" - # Rules: - # - when a block has a next block, the next block must be emitted just after - # - when a block has followers (relative jumps), it must be emitted before - # them - # - all reachable blocks must be emitted - order = [] - - # Find all the blocks to be emitted. - remaining = set() - todo = [start_block] - while todo: - b = todo.pop() - if b in remaining: - continue - remaining.add(b) - for c in b.get_children(): - if c not in remaining: - todo.append(c) - - # A block is dominated by another block if that block must be emitted - # before it. - dominators = {} - for b in remaining: - if __debug__ and b.next: - assert b is b.next[0].prev[0], (b, b.next) - # Make sure every block appears in dominators, even if no - # other block must precede it. - dominators.setdefault(b, set()) - # preceding blocks dominate following blocks - for c in b.get_followers(): - while 1: - dominators.setdefault(c, set()).add(b) - # Any block that has a next pointer leading to c is also - # dominated because the whole chain will be emitted at once. - # Walk backwards and add them all. - if c.prev and c.prev[0] is not b: - c = c.prev[0] - else: - break - - def find_next(): - # Find a block that can be emitted next. - for b in remaining: - for c in dominators[b]: - if c in remaining: - break # can't emit yet, dominated by a remaining block - else: - return b - assert 0, 'circular dependency, cannot find next block' - - b = start_block - while 1: - order.append(b) - remaining.discard(b) - if b.next: - b = b.next[0] - continue - elif b is not exit_block and not b.has_unconditional_transfer(): - order.append(exit_block) - if not remaining: - break - b = find_next() - return order - - -class Block: - _count = 0 - - def __init__(self, label=''): - self.insts = [] - self.outEdges = set() - self.label = label - self.bid = Block._count - self.next = [] - self.prev = [] - Block._count = Block._count + 1 - - def __repr__(self): - if self.label: - return "" % (self.label, self.bid) - else: - return "" % (self.bid) - - def __str__(self): - insts = map(str, self.insts) - return "" % (self.label, self.bid, - '\n'.join(insts)) - - def emit(self, inst): - op = inst[0] - self.insts.append(inst) - - def getInstructions(self): - return self.insts - - def addOutEdge(self, block): - self.outEdges.add(block) - - def addNext(self, block): - self.next.append(block) - assert len(self.next) == 1, map(str, self.next) - block.prev.append(self) - assert len(block.prev) == 1, map(str, block.prev) - - _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', - 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP', - ) - - def has_unconditional_transfer(self): - """Returns True if there is an unconditional transfer to an other block - at the end of this block. This means there is no risk for the bytecode - executer to go past this block's bytecode.""" - try: - op, arg = self.insts[-1] - except (IndexError, ValueError): - return - return op in self._uncond_transfer - - def get_children(self): - return list(self.outEdges) + self.next - - def get_followers(self): - """Get the whole list of followers, including the next block.""" - followers = set(self.next) - # Blocks that must be emitted *after* this one, because of - # bytecode offsets (e.g. relative jumps) pointing to them. - for inst in self.insts: - if inst[0] in PyFlowGraph.hasjrel: - followers.add(inst[1]) - return followers - - def getContainedGraphs(self): - """Return all graphs contained within this block. - - For example, a MAKE_FUNCTION block will contain a reference to - the graph for the function body. - """ - contained = [] - for inst in self.insts: - if len(inst) == 1: - continue - op = inst[1] - if hasattr(op, 'graph'): - contained.append(op.graph) - return contained - -# flags for code objects - -# the FlowGraph is transformed in place; it exists in one of these states -RAW = "RAW" -FLAT = "FLAT" -CONV = "CONV" -DONE = "DONE" - -class PyFlowGraph(FlowGraph): - super_init = FlowGraph.__init__ - - def __init__(self, name, filename, args=(), optimized=0, klass=None): - self.super_init() - self.name = name - self.filename = filename - self.docstring = None - self.args = args # XXX - self.argcount = getArgCount(args) - self.klass = klass - if optimized: - self.flags = CO_OPTIMIZED | CO_NEWLOCALS - else: - self.flags = 0 - self.consts = [] - self.names = [] - # Free variables found by the symbol table scan, including - # variables used only in nested scopes, are included here. - self.freevars = [] - self.cellvars = [] - # The closure list is used to track the order of cell - # variables and free variables in the resulting code object. - # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both - # kinds of variables. - self.closure = [] - self.varnames = list(args) or [] - for i in range(len(self.varnames)): - var = self.varnames[i] - if isinstance(var, TupleArg): - self.varnames[i] = var.getName() - self.stage = RAW - - def setDocstring(self, doc): - self.docstring = doc - - def setFlag(self, flag): - self.flags = self.flags | flag - if flag == CO_VARARGS: - self.argcount = self.argcount - 1 - - def checkFlag(self, flag): - if self.flags & flag: - return 1 - - def setFreeVars(self, names): - self.freevars = list(names) - - def setCellVars(self, names): - self.cellvars = names - - def getCode(self): - """Get a Python code object""" - assert self.stage == RAW - self.computeStackDepth() - self.flattenGraph() - assert self.stage == FLAT - self.convertArgs() - assert self.stage == CONV - self.makeByteCode() - assert self.stage == DONE - return self.newCodeObject() - - def dump(self, io=None): - if io: - save = sys.stdout - sys.stdout = io - pc = 0 - for t in self.insts: - opname = t[0] - if opname == "SET_LINENO": - print - if len(t) == 1: - print "\t", "%3d" % pc, opname - pc = pc + 1 - else: - print "\t", "%3d" % pc, opname, t[1] - pc = pc + 3 - if io: - sys.stdout = save - - def computeStackDepth(self): - """Compute the max stack depth. - - Approach is to compute the stack effect of each basic block. - Then find the path through the code with the largest total - effect. - """ - depth = {} - exit = None - for b in self.getBlocks(): - depth[b] = findDepth(b.getInstructions()) - - seen = {} - - def max_depth(b, d): - if b in seen: - return d - seen[b] = 1 - d = d + depth[b] - children = b.get_children() - if children: - return max([max_depth(c, d) for c in children]) - else: - if not b.label == "exit": - return max_depth(self.exit, d) - else: - return d - - self.stacksize = max_depth(self.entry, 0) - - def flattenGraph(self): - """Arrange the blocks in order and resolve jumps""" - assert self.stage == RAW - self.insts = insts = [] - pc = 0 - begin = {} - end = {} - for b in self.getBlocksInOrder(): - begin[b] = pc - for inst in b.getInstructions(): - insts.append(inst) - if len(inst) == 1: - pc = pc + 1 - elif inst[0] != "SET_LINENO": - # arg takes 2 bytes - pc = pc + 3 - end[b] = pc - pc = 0 - for i in range(len(insts)): - inst = insts[i] - if len(inst) == 1: - pc = pc + 1 - elif inst[0] != "SET_LINENO": - pc = pc + 3 - opname = inst[0] - if opname in self.hasjrel: - oparg = inst[1] - offset = begin[oparg] - pc - insts[i] = opname, offset - elif opname in self.hasjabs: - insts[i] = opname, begin[inst[1]] - self.stage = FLAT - - hasjrel = set() - for i in dis.hasjrel: - hasjrel.add(dis.opname[i]) - hasjabs = set() - for i in dis.hasjabs: - hasjabs.add(dis.opname[i]) - - def convertArgs(self): - """Convert arguments from symbolic to concrete form""" - assert self.stage == FLAT - self.consts.insert(0, self.docstring) - self.sort_cellvars() - for i in range(len(self.insts)): - t = self.insts[i] - if len(t) == 2: - opname, oparg = t - conv = self._converters.get(opname, None) - if conv: - self.insts[i] = opname, conv(self, oparg) - self.stage = CONV - - def sort_cellvars(self): - """Sort cellvars in the order of varnames and prune from freevars. - """ - cells = {} - for name in self.cellvars: - cells[name] = 1 - self.cellvars = [name for name in self.varnames - if name in cells] - for name in self.cellvars: - del cells[name] - self.cellvars = self.cellvars + cells.keys() - self.closure = self.cellvars + self.freevars - - def _lookupName(self, name, list): - """Return index of name in list, appending if necessary - - This routine uses a list instead of a dictionary, because a - dictionary can't store two different keys if the keys have the - same value but different types, e.g. 2 and 2L. The compiler - must treat these two separately, so it does an explicit type - comparison before comparing the values. - """ - t = type(name) - for i in range(len(list)): - if t == type(list[i]) and list[i] == name: - return i - end = len(list) - list.append(name) - return end - - _converters = {} - def _convert_LOAD_CONST(self, arg): - if hasattr(arg, 'getCode'): - arg = arg.getCode() - return self._lookupName(arg, self.consts) - - def _convert_LOAD_FAST(self, arg): - self._lookupName(arg, self.names) - return self._lookupName(arg, self.varnames) - _convert_STORE_FAST = _convert_LOAD_FAST - _convert_DELETE_FAST = _convert_LOAD_FAST - - def _convert_LOAD_NAME(self, arg): - if self.klass is None: - self._lookupName(arg, self.varnames) - return self._lookupName(arg, self.names) - - def _convert_NAME(self, arg): - if self.klass is None: - self._lookupName(arg, self.varnames) - return self._lookupName(arg, self.names) - _convert_STORE_NAME = _convert_NAME - _convert_DELETE_NAME = _convert_NAME - _convert_IMPORT_NAME = _convert_NAME - _convert_IMPORT_FROM = _convert_NAME - _convert_STORE_ATTR = _convert_NAME - _convert_LOAD_ATTR = _convert_NAME - _convert_DELETE_ATTR = _convert_NAME - _convert_LOAD_GLOBAL = _convert_NAME - _convert_STORE_GLOBAL = _convert_NAME - _convert_DELETE_GLOBAL = _convert_NAME - - def _convert_DEREF(self, arg): - self._lookupName(arg, self.names) - self._lookupName(arg, self.varnames) - return self._lookupName(arg, self.closure) - _convert_LOAD_DEREF = _convert_DEREF - _convert_STORE_DEREF = _convert_DEREF - - def _convert_LOAD_CLOSURE(self, arg): - self._lookupName(arg, self.varnames) - return self._lookupName(arg, self.closure) - - _cmp = list(dis.cmp_op) - def _convert_COMPARE_OP(self, arg): - return self._cmp.index(arg) - - # similarly for other opcodes... - - for name, obj in locals().items(): - if name[:9] == "_convert_": - opname = name[9:] - _converters[opname] = obj - del name, obj, opname - - def makeByteCode(self): - assert self.stage == CONV - self.lnotab = lnotab = LineAddrTable() - for t in self.insts: - opname = t[0] - if len(t) == 1: - lnotab.addCode(self.opnum[opname]) - else: - oparg = t[1] - if opname == "SET_LINENO": - lnotab.nextLine(oparg) - continue - hi, lo = twobyte(oparg) - try: - lnotab.addCode(self.opnum[opname], lo, hi) - except ValueError: - print opname, oparg - print self.opnum[opname], lo, hi - raise - self.stage = DONE - - opnum = {} - for num in range(len(dis.opname)): - opnum[dis.opname[num]] = num - del num - - def newCodeObject(self): - assert self.stage == DONE - if (self.flags & CO_NEWLOCALS) == 0: - nlocals = 0 - else: - nlocals = len(self.varnames) - argcount = self.argcount - if self.flags & CO_VARKEYWORDS: - argcount = argcount - 1 - return types.CodeType(argcount, nlocals, self.stacksize, self.flags, - self.lnotab.getCode(), self.getConsts(), - tuple(self.names), tuple(self.varnames), - self.filename, self.name, self.lnotab.firstline, - self.lnotab.getTable(), tuple(self.freevars), - tuple(self.cellvars)) - - def getConsts(self): - """Return a tuple for the const slot of the code object - - Must convert references to code (MAKE_FUNCTION) to code - objects recursively. - """ - l = [] - for elt in self.consts: - if isinstance(elt, PyFlowGraph): - elt = elt.getCode() - l.append(elt) - return tuple(l) - -def isJump(opname): - if opname[:4] == 'JUMP': - return 1 - -class TupleArg: - """Helper for marking func defs with nested tuples in arglist""" - def __init__(self, count, names): - self.count = count - self.names = names - def __repr__(self): - return "TupleArg(%s, %s)" % (self.count, self.names) - def getName(self): - return ".%d" % self.count - -def getArgCount(args): - argcount = len(args) - if args: - for arg in args: - if isinstance(arg, TupleArg): - numNames = len(misc.flatten(arg.names)) - argcount = argcount - numNames - return argcount - -def twobyte(val): - """Convert an int argument into high and low bytes""" - assert isinstance(val, int) - return divmod(val, 256) - -class LineAddrTable: - """lnotab - - This class builds the lnotab, which is documented in compile.c. - Here's a brief recap: - - For each SET_LINENO instruction after the first one, two bytes are - added to lnotab. (In some cases, multiple two-byte entries are - added.) The first byte is the distance in bytes between the - instruction for the last SET_LINENO and the current SET_LINENO. - The second byte is offset in line numbers. If either offset is - greater than 255, multiple two-byte entries are added -- see - compile.c for the delicate details. - """ - - def __init__(self): - self.code = [] - self.codeOffset = 0 - self.firstline = 0 - self.lastline = 0 - self.lastoff = 0 - self.lnotab = [] - - def addCode(self, *args): - for arg in args: - self.code.append(chr(arg)) - self.codeOffset = self.codeOffset + len(args) - - def nextLine(self, lineno): - if self.firstline == 0: - self.firstline = lineno - self.lastline = lineno - else: - # compute deltas - addr = self.codeOffset - self.lastoff - line = lineno - self.lastline - # Python assumes that lineno always increases with - # increasing bytecode address (lnotab is unsigned char). - # Depending on when SET_LINENO instructions are emitted - # this is not always true. Consider the code: - # a = (1, - # b) - # In the bytecode stream, the assignment to "a" occurs - # after the loading of "b". This works with the C Python - # compiler because it only generates a SET_LINENO instruction - # for the assignment. - if line >= 0: - push = self.lnotab.append - while addr > 255: - push(255); push(0) - addr -= 255 - while line > 255: - push(addr); push(255) - line -= 255 - addr = 0 - if addr > 0 or line > 0: - push(addr); push(line) - self.lastline = lineno - self.lastoff = self.codeOffset - - def getCode(self): - return ''.join(self.code) - - def getTable(self): - return ''.join(map(chr, self.lnotab)) - -class StackDepthTracker: - # XXX 1. need to keep track of stack depth on jumps - # XXX 2. at least partly as a result, this code is broken - - def findDepth(self, insts, debug=0): - depth = 0 - maxDepth = 0 - for i in insts: - opname = i[0] - if debug: - print i, - delta = self.effect.get(opname, None) - if delta is not None: - depth = depth + delta - else: - # now check patterns - for pat, pat_delta in self.patterns: - if opname[:len(pat)] == pat: - delta = pat_delta - depth = depth + delta - break - # if we still haven't found a match - if delta is None: - meth = getattr(self, opname, None) - if meth is not None: - depth = depth + meth(i[1]) - if depth > maxDepth: - maxDepth = depth - if debug: - print depth, maxDepth - return maxDepth - - effect = { - 'POP_TOP': -1, - 'DUP_TOP': 1, - 'LIST_APPEND': -1, - 'SET_ADD': -1, - 'MAP_ADD': -2, - 'SLICE+1': -1, - 'SLICE+2': -1, - 'SLICE+3': -2, - 'STORE_SLICE+0': -1, - 'STORE_SLICE+1': -2, - 'STORE_SLICE+2': -2, - 'STORE_SLICE+3': -3, - 'DELETE_SLICE+0': -1, - 'DELETE_SLICE+1': -2, - 'DELETE_SLICE+2': -2, - 'DELETE_SLICE+3': -3, - 'STORE_SUBSCR': -3, - 'DELETE_SUBSCR': -2, - # PRINT_EXPR? - 'PRINT_ITEM': -1, - 'RETURN_VALUE': -1, - 'YIELD_VALUE': -1, - 'EXEC_STMT': -3, - 'BUILD_CLASS': -2, - 'STORE_NAME': -1, - 'STORE_ATTR': -2, - 'DELETE_ATTR': -1, - 'STORE_GLOBAL': -1, - 'BUILD_MAP': 1, - 'COMPARE_OP': -1, - 'STORE_FAST': -1, - 'IMPORT_STAR': -1, - 'IMPORT_NAME': -1, - 'IMPORT_FROM': 1, - 'LOAD_ATTR': 0, # unlike other loads - # close enough... - 'SETUP_EXCEPT': 3, - 'SETUP_FINALLY': 3, - 'FOR_ITER': 1, - 'WITH_CLEANUP': -1, - } - # use pattern match - patterns = [ - ('BINARY_', -1), - ('LOAD_', 1), - ] - - def UNPACK_SEQUENCE(self, count): - return count-1 - def BUILD_TUPLE(self, count): - return -count+1 - def BUILD_LIST(self, count): - return -count+1 - def BUILD_SET(self, count): - return -count+1 - def CALL_FUNCTION(self, argc): - hi, lo = divmod(argc, 256) - return -(lo + hi * 2) - def CALL_FUNCTION_VAR(self, argc): - return self.CALL_FUNCTION(argc)-1 - def CALL_FUNCTION_KW(self, argc): - return self.CALL_FUNCTION(argc)-1 - def CALL_FUNCTION_VAR_KW(self, argc): - return self.CALL_FUNCTION(argc)-2 - def MAKE_FUNCTION(self, argc): - return -argc - def MAKE_CLOSURE(self, argc): - # XXX need to account for free variables too! - return -argc - def BUILD_SLICE(self, argc): - if argc == 2: - return -1 - elif argc == 3: - return -2 - def DUP_TOPX(self, argc): - return argc - -findDepth = StackDepthTracker().findDepth diff --git a/python/Lib/compiler/pycodegen.py b/python/Lib/compiler/pycodegen.py deleted file mode 100755 index 6515945f39..0000000000 --- a/python/Lib/compiler/pycodegen.py +++ /dev/null @@ -1,1555 +0,0 @@ -import imp -import os -import marshal -import struct -import sys -from cStringIO import StringIO - -from compiler import ast, parse, walk, syntax -from compiler import pyassem, misc, future, symbols -from compiler.consts import SC_LOCAL, SC_GLOBAL_IMPLICIT, SC_GLOBAL_EXPLICIT, \ - SC_FREE, SC_CELL -from compiler.consts import (CO_VARARGS, CO_VARKEYWORDS, CO_NEWLOCALS, - CO_NESTED, CO_GENERATOR, CO_FUTURE_DIVISION, - CO_FUTURE_ABSIMPORT, CO_FUTURE_WITH_STATEMENT, CO_FUTURE_PRINT_FUNCTION) -from compiler.pyassem import TupleArg - -# XXX The version-specific code can go, since this code only works with 2.x. -# Do we have Python 1.x or Python 2.x? -try: - VERSION = sys.version_info[0] -except AttributeError: - VERSION = 1 - -callfunc_opcode_info = { - # (Have *args, Have **args) : opcode - (0,0) : "CALL_FUNCTION", - (1,0) : "CALL_FUNCTION_VAR", - (0,1) : "CALL_FUNCTION_KW", - (1,1) : "CALL_FUNCTION_VAR_KW", -} - -LOOP = 1 -EXCEPT = 2 -TRY_FINALLY = 3 -END_FINALLY = 4 - -def compileFile(filename, display=0): - f = open(filename, 'U') - buf = f.read() - f.close() - mod = Module(buf, filename) - try: - mod.compile(display) - except SyntaxError: - raise - else: - f = open(filename + "c", "wb") - mod.dump(f) - f.close() - -def compile(source, filename, mode, flags=None, dont_inherit=None): - """Replacement for builtin compile() function""" - if flags is not None or dont_inherit is not None: - raise RuntimeError, "not implemented yet" - - if mode == "single": - gen = Interactive(source, filename) - elif mode == "exec": - gen = Module(source, filename) - elif mode == "eval": - gen = Expression(source, filename) - else: - raise ValueError("compile() 3rd arg must be 'exec' or " - "'eval' or 'single'") - gen.compile() - return gen.code - -class AbstractCompileMode: - - mode = None # defined by subclass - - def __init__(self, source, filename): - self.source = source - self.filename = filename - self.code = None - - def _get_tree(self): - tree = parse(self.source, self.mode) - misc.set_filename(self.filename, tree) - syntax.check(tree) - return tree - - def compile(self): - pass # implemented by subclass - - def getCode(self): - return self.code - -class Expression(AbstractCompileMode): - - mode = "eval" - - def compile(self): - tree = self._get_tree() - gen = ExpressionCodeGenerator(tree) - self.code = gen.getCode() - -class Interactive(AbstractCompileMode): - - mode = "single" - - def compile(self): - tree = self._get_tree() - gen = InteractiveCodeGenerator(tree) - self.code = gen.getCode() - -class Module(AbstractCompileMode): - - mode = "exec" - - def compile(self, display=0): - tree = self._get_tree() - gen = ModuleCodeGenerator(tree) - if display: - import pprint - print pprint.pprint(tree) - self.code = gen.getCode() - - def dump(self, f): - f.write(self.getPycHeader()) - marshal.dump(self.code, f) - - MAGIC = imp.get_magic() - - def getPycHeader(self): - # compile.c uses marshal to write a long directly, with - # calling the interface that would also generate a 1-byte code - # to indicate the type of the value. simplest way to get the - # same effect is to call marshal and then skip the code. - mtime = os.path.getmtime(self.filename) - mtime = struct.pack(' 0: - top = top - 1 - kind, loop_block = self.setups[top] - if kind == LOOP: - break - if kind != LOOP: - raise SyntaxError, "'continue' outside loop (%s, %d)" % \ - (node.filename, node.lineno) - self.emit('CONTINUE_LOOP', loop_block) - self.nextBlock() - elif kind == END_FINALLY: - msg = "'continue' not allowed inside 'finally' clause (%s, %d)" - raise SyntaxError, msg % (node.filename, node.lineno) - - def visitTest(self, node, jump): - end = self.newBlock() - for child in node.nodes[:-1]: - self.visit(child) - self.emit(jump, end) - self.nextBlock() - self.visit(node.nodes[-1]) - self.nextBlock(end) - - def visitAnd(self, node): - self.visitTest(node, 'JUMP_IF_FALSE_OR_POP') - - def visitOr(self, node): - self.visitTest(node, 'JUMP_IF_TRUE_OR_POP') - - def visitIfExp(self, node): - endblock = self.newBlock() - elseblock = self.newBlock() - self.visit(node.test) - self.emit('POP_JUMP_IF_FALSE', elseblock) - self.visit(node.then) - self.emit('JUMP_FORWARD', endblock) - self.nextBlock(elseblock) - self.visit(node.else_) - self.nextBlock(endblock) - - def visitCompare(self, node): - self.visit(node.expr) - cleanup = self.newBlock() - for op, code in node.ops[:-1]: - self.visit(code) - self.emit('DUP_TOP') - self.emit('ROT_THREE') - self.emit('COMPARE_OP', op) - self.emit('JUMP_IF_FALSE_OR_POP', cleanup) - self.nextBlock() - # now do the last comparison - if node.ops: - op, code = node.ops[-1] - self.visit(code) - self.emit('COMPARE_OP', op) - if len(node.ops) > 1: - end = self.newBlock() - self.emit('JUMP_FORWARD', end) - self.startBlock(cleanup) - self.emit('ROT_TWO') - self.emit('POP_TOP') - self.nextBlock(end) - - # list comprehensions - def visitListComp(self, node): - self.set_lineno(node) - # setup list - self.emit('BUILD_LIST', 0) - - stack = [] - for i, for_ in zip(range(len(node.quals)), node.quals): - start, anchor = self.visit(for_) - cont = None - for if_ in for_.ifs: - if cont is None: - cont = self.newBlock() - self.visit(if_, cont) - stack.insert(0, (start, cont, anchor)) - - self.visit(node.expr) - self.emit('LIST_APPEND', len(node.quals) + 1) - - for start, cont, anchor in stack: - if cont: - self.nextBlock(cont) - self.emit('JUMP_ABSOLUTE', start) - self.startBlock(anchor) - - def visitSetComp(self, node): - self.set_lineno(node) - # setup list - self.emit('BUILD_SET', 0) - - stack = [] - for i, for_ in zip(range(len(node.quals)), node.quals): - start, anchor = self.visit(for_) - cont = None - for if_ in for_.ifs: - if cont is None: - cont = self.newBlock() - self.visit(if_, cont) - stack.insert(0, (start, cont, anchor)) - - self.visit(node.expr) - self.emit('SET_ADD', len(node.quals) + 1) - - for start, cont, anchor in stack: - if cont: - self.nextBlock(cont) - self.emit('JUMP_ABSOLUTE', start) - self.startBlock(anchor) - - def visitDictComp(self, node): - self.set_lineno(node) - # setup list - self.emit('BUILD_MAP', 0) - - stack = [] - for i, for_ in zip(range(len(node.quals)), node.quals): - start, anchor = self.visit(for_) - cont = None - for if_ in for_.ifs: - if cont is None: - cont = self.newBlock() - self.visit(if_, cont) - stack.insert(0, (start, cont, anchor)) - - self.visit(node.value) - self.visit(node.key) - self.emit('MAP_ADD', len(node.quals) + 1) - - for start, cont, anchor in stack: - if cont: - self.nextBlock(cont) - self.emit('JUMP_ABSOLUTE', start) - self.startBlock(anchor) - - def visitListCompFor(self, node): - start = self.newBlock() - anchor = self.newBlock() - - self.visit(node.list) - self.emit('GET_ITER') - self.nextBlock(start) - self.set_lineno(node, force=True) - self.emit('FOR_ITER', anchor) - self.nextBlock() - self.visit(node.assign) - return start, anchor - - def visitListCompIf(self, node, branch): - self.set_lineno(node, force=True) - self.visit(node.test) - self.emit('POP_JUMP_IF_FALSE', branch) - self.newBlock() - - def _makeClosure(self, gen, args): - frees = gen.scope.get_free_vars() - if frees: - for name in frees: - self.emit('LOAD_CLOSURE', name) - self.emit('BUILD_TUPLE', len(frees)) - self.emit('LOAD_CONST', gen) - self.emit('MAKE_CLOSURE', args) - else: - self.emit('LOAD_CONST', gen) - self.emit('MAKE_FUNCTION', args) - - def visitGenExpr(self, node): - gen = GenExprCodeGenerator(node, self.scopes, self.class_name, - self.get_module()) - walk(node.code, gen) - gen.finish() - self.set_lineno(node) - self._makeClosure(gen, 0) - # precomputation of outmost iterable - self.visit(node.code.quals[0].iter) - self.emit('GET_ITER') - self.emit('CALL_FUNCTION', 1) - - def visitGenExprInner(self, node): - self.set_lineno(node) - # setup list - - stack = [] - for i, for_ in zip(range(len(node.quals)), node.quals): - start, anchor, end = self.visit(for_) - cont = None - for if_ in for_.ifs: - if cont is None: - cont = self.newBlock() - self.visit(if_, cont) - stack.insert(0, (start, cont, anchor, end)) - - self.visit(node.expr) - self.emit('YIELD_VALUE') - self.emit('POP_TOP') - - for start, cont, anchor, end in stack: - if cont: - self.nextBlock(cont) - self.emit('JUMP_ABSOLUTE', start) - self.startBlock(anchor) - self.emit('POP_BLOCK') - self.setups.pop() - self.nextBlock(end) - - self.emit('LOAD_CONST', None) - - def visitGenExprFor(self, node): - start = self.newBlock() - anchor = self.newBlock() - end = self.newBlock() - - self.setups.push((LOOP, start)) - self.emit('SETUP_LOOP', end) - - if node.is_outmost: - self.loadName('.0') - else: - self.visit(node.iter) - self.emit('GET_ITER') - - self.nextBlock(start) - self.set_lineno(node, force=True) - self.emit('FOR_ITER', anchor) - self.nextBlock() - self.visit(node.assign) - return start, anchor, end - - def visitGenExprIf(self, node, branch): - self.set_lineno(node, force=True) - self.visit(node.test) - self.emit('POP_JUMP_IF_FALSE', branch) - self.newBlock() - - # exception related - - def visitAssert(self, node): - # XXX would be interesting to implement this via a - # transformation of the AST before this stage - if __debug__: - end = self.newBlock() - self.set_lineno(node) - # XXX AssertionError appears to be special case -- it is always - # loaded as a global even if there is a local name. I guess this - # is a sort of renaming op. - self.nextBlock() - self.visit(node.test) - self.emit('POP_JUMP_IF_TRUE', end) - self.nextBlock() - self.emit('LOAD_GLOBAL', 'AssertionError') - if node.fail: - self.visit(node.fail) - self.emit('RAISE_VARARGS', 2) - else: - self.emit('RAISE_VARARGS', 1) - self.nextBlock(end) - - def visitRaise(self, node): - self.set_lineno(node) - n = 0 - if node.expr1: - self.visit(node.expr1) - n = n + 1 - if node.expr2: - self.visit(node.expr2) - n = n + 1 - if node.expr3: - self.visit(node.expr3) - n = n + 1 - self.emit('RAISE_VARARGS', n) - - def visitTryExcept(self, node): - body = self.newBlock() - handlers = self.newBlock() - end = self.newBlock() - if node.else_: - lElse = self.newBlock() - else: - lElse = end - self.set_lineno(node) - self.emit('SETUP_EXCEPT', handlers) - self.nextBlock(body) - self.setups.push((EXCEPT, body)) - self.visit(node.body) - self.emit('POP_BLOCK') - self.setups.pop() - self.emit('JUMP_FORWARD', lElse) - self.startBlock(handlers) - - last = len(node.handlers) - 1 - for i in range(len(node.handlers)): - expr, target, body = node.handlers[i] - self.set_lineno(expr) - if expr: - self.emit('DUP_TOP') - self.visit(expr) - self.emit('COMPARE_OP', 'exception match') - next = self.newBlock() - self.emit('POP_JUMP_IF_FALSE', next) - self.nextBlock() - self.emit('POP_TOP') - if target: - self.visit(target) - else: - self.emit('POP_TOP') - self.emit('POP_TOP') - self.visit(body) - self.emit('JUMP_FORWARD', end) - if expr: - self.nextBlock(next) - else: - self.nextBlock() - self.emit('END_FINALLY') - if node.else_: - self.nextBlock(lElse) - self.visit(node.else_) - self.nextBlock(end) - - def visitTryFinally(self, node): - body = self.newBlock() - final = self.newBlock() - self.set_lineno(node) - self.emit('SETUP_FINALLY', final) - self.nextBlock(body) - self.setups.push((TRY_FINALLY, body)) - self.visit(node.body) - self.emit('POP_BLOCK') - self.setups.pop() - self.emit('LOAD_CONST', None) - self.nextBlock(final) - self.setups.push((END_FINALLY, final)) - self.visit(node.final) - self.emit('END_FINALLY') - self.setups.pop() - - __with_count = 0 - - def visitWith(self, node): - body = self.newBlock() - final = self.newBlock() - self.__with_count += 1 - valuevar = "_[%d]" % self.__with_count - self.set_lineno(node) - self.visit(node.expr) - self.emit('DUP_TOP') - self.emit('LOAD_ATTR', '__exit__') - self.emit('ROT_TWO') - self.emit('LOAD_ATTR', '__enter__') - self.emit('CALL_FUNCTION', 0) - if node.vars is None: - self.emit('POP_TOP') - else: - self._implicitNameOp('STORE', valuevar) - self.emit('SETUP_FINALLY', final) - self.nextBlock(body) - self.setups.push((TRY_FINALLY, body)) - if node.vars is not None: - self._implicitNameOp('LOAD', valuevar) - self._implicitNameOp('DELETE', valuevar) - self.visit(node.vars) - self.visit(node.body) - self.emit('POP_BLOCK') - self.setups.pop() - self.emit('LOAD_CONST', None) - self.nextBlock(final) - self.setups.push((END_FINALLY, final)) - self.emit('WITH_CLEANUP') - self.emit('END_FINALLY') - self.setups.pop() - self.__with_count -= 1 - - # misc - - def visitDiscard(self, node): - self.set_lineno(node) - self.visit(node.expr) - self.emit('POP_TOP') - - def visitConst(self, node): - self.emit('LOAD_CONST', node.value) - - def visitKeyword(self, node): - self.emit('LOAD_CONST', node.name) - self.visit(node.expr) - - def visitGlobal(self, node): - # no code to generate - pass - - def visitName(self, node): - self.set_lineno(node) - self.loadName(node.name) - - def visitPass(self, node): - self.set_lineno(node) - - def visitImport(self, node): - self.set_lineno(node) - level = 0 if self.graph.checkFlag(CO_FUTURE_ABSIMPORT) else -1 - for name, alias in node.names: - if VERSION > 1: - self.emit('LOAD_CONST', level) - self.emit('LOAD_CONST', None) - self.emit('IMPORT_NAME', name) - mod = name.split(".")[0] - if alias: - self._resolveDots(name) - self.storeName(alias) - else: - self.storeName(mod) - - def visitFrom(self, node): - self.set_lineno(node) - level = node.level - if level == 0 and not self.graph.checkFlag(CO_FUTURE_ABSIMPORT): - level = -1 - fromlist = tuple(name for (name, alias) in node.names) - if VERSION > 1: - self.emit('LOAD_CONST', level) - self.emit('LOAD_CONST', fromlist) - self.emit('IMPORT_NAME', node.modname) - for name, alias in node.names: - if VERSION > 1: - if name == '*': - self.namespace = 0 - self.emit('IMPORT_STAR') - # There can only be one name w/ from ... import * - assert len(node.names) == 1 - return - else: - self.emit('IMPORT_FROM', name) - self._resolveDots(name) - self.storeName(alias or name) - else: - self.emit('IMPORT_FROM', name) - self.emit('POP_TOP') - - def _resolveDots(self, name): - elts = name.split(".") - if len(elts) == 1: - return - for elt in elts[1:]: - self.emit('LOAD_ATTR', elt) - - def visitGetattr(self, node): - self.visit(node.expr) - self.emit('LOAD_ATTR', self.mangle(node.attrname)) - - # next five implement assignments - - def visitAssign(self, node): - self.set_lineno(node) - self.visit(node.expr) - dups = len(node.nodes) - 1 - for i in range(len(node.nodes)): - elt = node.nodes[i] - if i < dups: - self.emit('DUP_TOP') - if isinstance(elt, ast.Node): - self.visit(elt) - - def visitAssName(self, node): - if node.flags == 'OP_ASSIGN': - self.storeName(node.name) - elif node.flags == 'OP_DELETE': - self.set_lineno(node) - self.delName(node.name) - else: - print "oops", node.flags - - def visitAssAttr(self, node): - self.visit(node.expr) - if node.flags == 'OP_ASSIGN': - self.emit('STORE_ATTR', self.mangle(node.attrname)) - elif node.flags == 'OP_DELETE': - self.emit('DELETE_ATTR', self.mangle(node.attrname)) - else: - print "warning: unexpected flags:", node.flags - print node - - def _visitAssSequence(self, node, op='UNPACK_SEQUENCE'): - if findOp(node) != 'OP_DELETE': - self.emit(op, len(node.nodes)) - for child in node.nodes: - self.visit(child) - - if VERSION > 1: - visitAssTuple = _visitAssSequence - visitAssList = _visitAssSequence - else: - def visitAssTuple(self, node): - self._visitAssSequence(node, 'UNPACK_TUPLE') - - def visitAssList(self, node): - self._visitAssSequence(node, 'UNPACK_LIST') - - # augmented assignment - - def visitAugAssign(self, node): - self.set_lineno(node) - aug_node = wrap_aug(node.node) - self.visit(aug_node, "load") - self.visit(node.expr) - self.emit(self._augmented_opcode[node.op]) - self.visit(aug_node, "store") - - _augmented_opcode = { - '+=' : 'INPLACE_ADD', - '-=' : 'INPLACE_SUBTRACT', - '*=' : 'INPLACE_MULTIPLY', - '/=' : 'INPLACE_DIVIDE', - '//=': 'INPLACE_FLOOR_DIVIDE', - '%=' : 'INPLACE_MODULO', - '**=': 'INPLACE_POWER', - '>>=': 'INPLACE_RSHIFT', - '<<=': 'INPLACE_LSHIFT', - '&=' : 'INPLACE_AND', - '^=' : 'INPLACE_XOR', - '|=' : 'INPLACE_OR', - } - - def visitAugName(self, node, mode): - if mode == "load": - self.loadName(node.name) - elif mode == "store": - self.storeName(node.name) - - def visitAugGetattr(self, node, mode): - if mode == "load": - self.visit(node.expr) - self.emit('DUP_TOP') - self.emit('LOAD_ATTR', self.mangle(node.attrname)) - elif mode == "store": - self.emit('ROT_TWO') - self.emit('STORE_ATTR', self.mangle(node.attrname)) - - def visitAugSlice(self, node, mode): - if mode == "load": - self.visitSlice(node, 1) - elif mode == "store": - slice = 0 - if node.lower: - slice = slice | 1 - if node.upper: - slice = slice | 2 - if slice == 0: - self.emit('ROT_TWO') - elif slice == 3: - self.emit('ROT_FOUR') - else: - self.emit('ROT_THREE') - self.emit('STORE_SLICE+%d' % slice) - - def visitAugSubscript(self, node, mode): - if mode == "load": - self.visitSubscript(node, 1) - elif mode == "store": - self.emit('ROT_THREE') - self.emit('STORE_SUBSCR') - - def visitExec(self, node): - self.visit(node.expr) - if node.locals is None: - self.emit('LOAD_CONST', None) - else: - self.visit(node.locals) - if node.globals is None: - self.emit('DUP_TOP') - else: - self.visit(node.globals) - self.emit('EXEC_STMT') - - def visitCallFunc(self, node): - pos = 0 - kw = 0 - self.set_lineno(node) - self.visit(node.node) - for arg in node.args: - self.visit(arg) - if isinstance(arg, ast.Keyword): - kw = kw + 1 - else: - pos = pos + 1 - if node.star_args is not None: - self.visit(node.star_args) - if node.dstar_args is not None: - self.visit(node.dstar_args) - have_star = node.star_args is not None - have_dstar = node.dstar_args is not None - opcode = callfunc_opcode_info[have_star, have_dstar] - self.emit(opcode, kw << 8 | pos) - - def visitPrint(self, node, newline=0): - self.set_lineno(node) - if node.dest: - self.visit(node.dest) - for child in node.nodes: - if node.dest: - self.emit('DUP_TOP') - self.visit(child) - if node.dest: - self.emit('ROT_TWO') - self.emit('PRINT_ITEM_TO') - else: - self.emit('PRINT_ITEM') - if node.dest and not newline: - self.emit('POP_TOP') - - def visitPrintnl(self, node): - self.visitPrint(node, newline=1) - if node.dest: - self.emit('PRINT_NEWLINE_TO') - else: - self.emit('PRINT_NEWLINE') - - def visitReturn(self, node): - self.set_lineno(node) - self.visit(node.value) - self.emit('RETURN_VALUE') - - def visitYield(self, node): - self.set_lineno(node) - self.visit(node.value) - self.emit('YIELD_VALUE') - - # slice and subscript stuff - - def visitSlice(self, node, aug_flag=None): - # aug_flag is used by visitAugSlice - self.visit(node.expr) - slice = 0 - if node.lower: - self.visit(node.lower) - slice = slice | 1 - if node.upper: - self.visit(node.upper) - slice = slice | 2 - if aug_flag: - if slice == 0: - self.emit('DUP_TOP') - elif slice == 3: - self.emit('DUP_TOPX', 3) - else: - self.emit('DUP_TOPX', 2) - if node.flags == 'OP_APPLY': - self.emit('SLICE+%d' % slice) - elif node.flags == 'OP_ASSIGN': - self.emit('STORE_SLICE+%d' % slice) - elif node.flags == 'OP_DELETE': - self.emit('DELETE_SLICE+%d' % slice) - else: - print "weird slice", node.flags - raise - - def visitSubscript(self, node, aug_flag=None): - self.visit(node.expr) - for sub in node.subs: - self.visit(sub) - if len(node.subs) > 1: - self.emit('BUILD_TUPLE', len(node.subs)) - if aug_flag: - self.emit('DUP_TOPX', 2) - if node.flags == 'OP_APPLY': - self.emit('BINARY_SUBSCR') - elif node.flags == 'OP_ASSIGN': - self.emit('STORE_SUBSCR') - elif node.flags == 'OP_DELETE': - self.emit('DELETE_SUBSCR') - - # binary ops - - def binaryOp(self, node, op): - self.visit(node.left) - self.visit(node.right) - self.emit(op) - - def visitAdd(self, node): - return self.binaryOp(node, 'BINARY_ADD') - - def visitSub(self, node): - return self.binaryOp(node, 'BINARY_SUBTRACT') - - def visitMul(self, node): - return self.binaryOp(node, 'BINARY_MULTIPLY') - - def visitDiv(self, node): - return self.binaryOp(node, self._div_op) - - def visitFloorDiv(self, node): - return self.binaryOp(node, 'BINARY_FLOOR_DIVIDE') - - def visitMod(self, node): - return self.binaryOp(node, 'BINARY_MODULO') - - def visitPower(self, node): - return self.binaryOp(node, 'BINARY_POWER') - - def visitLeftShift(self, node): - return self.binaryOp(node, 'BINARY_LSHIFT') - - def visitRightShift(self, node): - return self.binaryOp(node, 'BINARY_RSHIFT') - - # unary ops - - def unaryOp(self, node, op): - self.visit(node.expr) - self.emit(op) - - def visitInvert(self, node): - return self.unaryOp(node, 'UNARY_INVERT') - - def visitUnarySub(self, node): - return self.unaryOp(node, 'UNARY_NEGATIVE') - - def visitUnaryAdd(self, node): - return self.unaryOp(node, 'UNARY_POSITIVE') - - def visitUnaryInvert(self, node): - return self.unaryOp(node, 'UNARY_INVERT') - - def visitNot(self, node): - return self.unaryOp(node, 'UNARY_NOT') - - def visitBackquote(self, node): - return self.unaryOp(node, 'UNARY_CONVERT') - - # bit ops - - def bitOp(self, nodes, op): - self.visit(nodes[0]) - for node in nodes[1:]: - self.visit(node) - self.emit(op) - - def visitBitand(self, node): - return self.bitOp(node.nodes, 'BINARY_AND') - - def visitBitor(self, node): - return self.bitOp(node.nodes, 'BINARY_OR') - - def visitBitxor(self, node): - return self.bitOp(node.nodes, 'BINARY_XOR') - - # object constructors - - def visitEllipsis(self, node): - self.emit('LOAD_CONST', Ellipsis) - - def visitTuple(self, node): - self.set_lineno(node) - for elt in node.nodes: - self.visit(elt) - self.emit('BUILD_TUPLE', len(node.nodes)) - - def visitList(self, node): - self.set_lineno(node) - for elt in node.nodes: - self.visit(elt) - self.emit('BUILD_LIST', len(node.nodes)) - - def visitSet(self, node): - self.set_lineno(node) - for elt in node.nodes: - self.visit(elt) - self.emit('BUILD_SET', len(node.nodes)) - - def visitSliceobj(self, node): - for child in node.nodes: - self.visit(child) - self.emit('BUILD_SLICE', len(node.nodes)) - - def visitDict(self, node): - self.set_lineno(node) - self.emit('BUILD_MAP', 0) - for k, v in node.items: - self.emit('DUP_TOP') - self.visit(k) - self.visit(v) - self.emit('ROT_THREE') - self.emit('STORE_SUBSCR') - -class NestedScopeMixin: - """Defines initClass() for nested scoping (Python 2.2-compatible)""" - def initClass(self): - self.__class__.NameFinder = LocalNameFinder - self.__class__.FunctionGen = FunctionCodeGenerator - self.__class__.ClassGen = ClassCodeGenerator - -class ModuleCodeGenerator(NestedScopeMixin, CodeGenerator): - __super_init = CodeGenerator.__init__ - - scopes = None - - def __init__(self, tree): - self.graph = pyassem.PyFlowGraph("", tree.filename) - self.futures = future.find_futures(tree) - self.__super_init() - walk(tree, self) - - def get_module(self): - return self - -class ExpressionCodeGenerator(NestedScopeMixin, CodeGenerator): - __super_init = CodeGenerator.__init__ - - scopes = None - futures = () - - def __init__(self, tree): - self.graph = pyassem.PyFlowGraph("", tree.filename) - self.__super_init() - walk(tree, self) - - def get_module(self): - return self - -class InteractiveCodeGenerator(NestedScopeMixin, CodeGenerator): - - __super_init = CodeGenerator.__init__ - - scopes = None - futures = () - - def __init__(self, tree): - self.graph = pyassem.PyFlowGraph("", tree.filename) - self.__super_init() - self.set_lineno(tree) - walk(tree, self) - self.emit('RETURN_VALUE') - - def get_module(self): - return self - - def visitDiscard(self, node): - # XXX Discard means it's an expression. Perhaps this is a bad - # name. - self.visit(node.expr) - self.emit('PRINT_EXPR') - -class AbstractFunctionCode: - optimized = 1 - lambdaCount = 0 - - def __init__(self, func, scopes, isLambda, class_name, mod): - self.class_name = class_name - self.module = mod - if isLambda: - klass = FunctionCodeGenerator - name = "" % klass.lambdaCount - klass.lambdaCount = klass.lambdaCount + 1 - else: - name = func.name - - args, hasTupleArg = generateArgList(func.argnames) - self.graph = pyassem.PyFlowGraph(name, func.filename, args, - optimized=1) - self.isLambda = isLambda - self.super_init() - - if not isLambda and func.doc: - self.setDocstring(func.doc) - - lnf = walk(func.code, self.NameFinder(args), verbose=0) - self.locals.push(lnf.getLocals()) - if func.varargs: - self.graph.setFlag(CO_VARARGS) - if func.kwargs: - self.graph.setFlag(CO_VARKEYWORDS) - self.set_lineno(func) - if hasTupleArg: - self.generateArgUnpack(func.argnames) - - def get_module(self): - return self.module - - def finish(self): - self.graph.startExitBlock() - if not self.isLambda: - self.emit('LOAD_CONST', None) - self.emit('RETURN_VALUE') - - def generateArgUnpack(self, args): - for i in range(len(args)): - arg = args[i] - if isinstance(arg, tuple): - self.emit('LOAD_FAST', '.%d' % (i * 2)) - self.unpackSequence(arg) - - def unpackSequence(self, tup): - if VERSION > 1: - self.emit('UNPACK_SEQUENCE', len(tup)) - else: - self.emit('UNPACK_TUPLE', len(tup)) - for elt in tup: - if isinstance(elt, tuple): - self.unpackSequence(elt) - else: - self._nameOp('STORE', elt) - - unpackTuple = unpackSequence - -class FunctionCodeGenerator(NestedScopeMixin, AbstractFunctionCode, - CodeGenerator): - super_init = CodeGenerator.__init__ # call be other init - scopes = None - - __super_init = AbstractFunctionCode.__init__ - - def __init__(self, func, scopes, isLambda, class_name, mod): - self.scopes = scopes - self.scope = scopes[func] - self.__super_init(func, scopes, isLambda, class_name, mod) - self.graph.setFreeVars(self.scope.get_free_vars()) - self.graph.setCellVars(self.scope.get_cell_vars()) - if self.scope.generator is not None: - self.graph.setFlag(CO_GENERATOR) - -class GenExprCodeGenerator(NestedScopeMixin, AbstractFunctionCode, - CodeGenerator): - super_init = CodeGenerator.__init__ # call be other init - scopes = None - - __super_init = AbstractFunctionCode.__init__ - - def __init__(self, gexp, scopes, class_name, mod): - self.scopes = scopes - self.scope = scopes[gexp] - self.__super_init(gexp, scopes, 1, class_name, mod) - self.graph.setFreeVars(self.scope.get_free_vars()) - self.graph.setCellVars(self.scope.get_cell_vars()) - self.graph.setFlag(CO_GENERATOR) - -class AbstractClassCode: - - def __init__(self, klass, scopes, module): - self.class_name = klass.name - self.module = module - self.graph = pyassem.PyFlowGraph(klass.name, klass.filename, - optimized=0, klass=1) - self.super_init() - lnf = walk(klass.code, self.NameFinder(), verbose=0) - self.locals.push(lnf.getLocals()) - self.graph.setFlag(CO_NEWLOCALS) - if klass.doc: - self.setDocstring(klass.doc) - - def get_module(self): - return self.module - - def finish(self): - self.graph.startExitBlock() - self.emit('LOAD_LOCALS') - self.emit('RETURN_VALUE') - -class ClassCodeGenerator(NestedScopeMixin, AbstractClassCode, CodeGenerator): - super_init = CodeGenerator.__init__ - scopes = None - - __super_init = AbstractClassCode.__init__ - - def __init__(self, klass, scopes, module): - self.scopes = scopes - self.scope = scopes[klass] - self.__super_init(klass, scopes, module) - self.graph.setFreeVars(self.scope.get_free_vars()) - self.graph.setCellVars(self.scope.get_cell_vars()) - self.set_lineno(klass) - self.emit("LOAD_GLOBAL", "__name__") - self.storeName("__module__") - if klass.doc: - self.emit("LOAD_CONST", klass.doc) - self.storeName('__doc__') - -def generateArgList(arglist): - """Generate an arg list marking TupleArgs""" - args = [] - extra = [] - count = 0 - for i in range(len(arglist)): - elt = arglist[i] - if isinstance(elt, str): - args.append(elt) - elif isinstance(elt, tuple): - args.append(TupleArg(i * 2, elt)) - extra.extend(misc.flatten(elt)) - count = count + 1 - else: - raise ValueError, "unexpect argument type:", elt - return args + extra, count - -def findOp(node): - """Find the op (DELETE, LOAD, STORE) in an AssTuple tree""" - v = OpFinder() - walk(node, v, verbose=0) - return v.op - -class OpFinder: - def __init__(self): - self.op = None - def visitAssName(self, node): - if self.op is None: - self.op = node.flags - elif self.op != node.flags: - raise ValueError, "mixed ops in stmt" - visitAssAttr = visitAssName - visitSubscript = visitAssName - -class Delegator: - """Base class to support delegation for augmented assignment nodes - - To generator code for augmented assignments, we use the following - wrapper classes. In visitAugAssign, the left-hand expression node - is visited twice. The first time the visit uses the normal method - for that node . The second time the visit uses a different method - that generates the appropriate code to perform the assignment. - These delegator classes wrap the original AST nodes in order to - support the variant visit methods. - """ - def __init__(self, obj): - self.obj = obj - - def __getattr__(self, attr): - return getattr(self.obj, attr) - -class AugGetattr(Delegator): - pass - -class AugName(Delegator): - pass - -class AugSlice(Delegator): - pass - -class AugSubscript(Delegator): - pass - -wrapper = { - ast.Getattr: AugGetattr, - ast.Name: AugName, - ast.Slice: AugSlice, - ast.Subscript: AugSubscript, - } - -def wrap_aug(node): - return wrapper[node.__class__](node) - -if __name__ == "__main__": - for file in sys.argv[1:]: - compileFile(file) diff --git a/python/Lib/compiler/symbols.py b/python/Lib/compiler/symbols.py deleted file mode 100755 index afeec50153..0000000000 --- a/python/Lib/compiler/symbols.py +++ /dev/null @@ -1,462 +0,0 @@ -"""Module symbol-table generator""" - -from compiler import ast -from compiler.consts import SC_LOCAL, SC_GLOBAL_IMPLICIT, SC_GLOBAL_EXPLICIT, \ - SC_FREE, SC_CELL, SC_UNKNOWN -from compiler.misc import mangle -import types - - -import sys - -MANGLE_LEN = 256 - -class Scope: - # XXX how much information do I need about each name? - def __init__(self, name, module, klass=None): - self.name = name - self.module = module - self.defs = {} - self.uses = {} - self.globals = {} - self.params = {} - self.frees = {} - self.cells = {} - self.children = [] - # nested is true if the class could contain free variables, - # i.e. if it is nested within another function. - self.nested = None - self.generator = None - self.klass = None - if klass is not None: - for i in range(len(klass)): - if klass[i] != '_': - self.klass = klass[i:] - break - - def __repr__(self): - return "<%s: %s>" % (self.__class__.__name__, self.name) - - def mangle(self, name): - if self.klass is None: - return name - return mangle(name, self.klass) - - def add_def(self, name): - self.defs[self.mangle(name)] = 1 - - def add_use(self, name): - self.uses[self.mangle(name)] = 1 - - def add_global(self, name): - name = self.mangle(name) - if name in self.uses or name in self.defs: - pass # XXX warn about global following def/use - if name in self.params: - raise SyntaxError, "%s in %s is global and parameter" % \ - (name, self.name) - self.globals[name] = 1 - self.module.add_def(name) - - def add_param(self, name): - name = self.mangle(name) - self.defs[name] = 1 - self.params[name] = 1 - - def get_names(self): - d = {} - d.update(self.defs) - d.update(self.uses) - d.update(self.globals) - return d.keys() - - def add_child(self, child): - self.children.append(child) - - def get_children(self): - return self.children - - def DEBUG(self): - print >> sys.stderr, self.name, self.nested and "nested" or "" - print >> sys.stderr, "\tglobals: ", self.globals - print >> sys.stderr, "\tcells: ", self.cells - print >> sys.stderr, "\tdefs: ", self.defs - print >> sys.stderr, "\tuses: ", self.uses - print >> sys.stderr, "\tfrees:", self.frees - - def check_name(self, name): - """Return scope of name. - - The scope of a name could be LOCAL, GLOBAL, FREE, or CELL. - """ - if name in self.globals: - return SC_GLOBAL_EXPLICIT - if name in self.cells: - return SC_CELL - if name in self.defs: - return SC_LOCAL - if self.nested and (name in self.frees or name in self.uses): - return SC_FREE - if self.nested: - return SC_UNKNOWN - else: - return SC_GLOBAL_IMPLICIT - - def get_free_vars(self): - if not self.nested: - return () - free = {} - free.update(self.frees) - for name in self.uses.keys(): - if name not in self.defs and name not in self.globals: - free[name] = 1 - return free.keys() - - def handle_children(self): - for child in self.children: - frees = child.get_free_vars() - globals = self.add_frees(frees) - for name in globals: - child.force_global(name) - - def force_global(self, name): - """Force name to be global in scope. - - Some child of the current node had a free reference to name. - When the child was processed, it was labelled a free - variable. Now that all its enclosing scope have been - processed, the name is known to be a global or builtin. So - walk back down the child chain and set the name to be global - rather than free. - - Be careful to stop if a child does not think the name is - free. - """ - self.globals[name] = 1 - if name in self.frees: - del self.frees[name] - for child in self.children: - if child.check_name(name) == SC_FREE: - child.force_global(name) - - def add_frees(self, names): - """Process list of free vars from nested scope. - - Returns a list of names that are either 1) declared global in the - parent or 2) undefined in a top-level parent. In either case, - the nested scope should treat them as globals. - """ - child_globals = [] - for name in names: - sc = self.check_name(name) - if self.nested: - if sc == SC_UNKNOWN or sc == SC_FREE \ - or isinstance(self, ClassScope): - self.frees[name] = 1 - elif sc == SC_GLOBAL_IMPLICIT: - child_globals.append(name) - elif isinstance(self, FunctionScope) and sc == SC_LOCAL: - self.cells[name] = 1 - elif sc != SC_CELL: - child_globals.append(name) - else: - if sc == SC_LOCAL: - self.cells[name] = 1 - elif sc != SC_CELL: - child_globals.append(name) - return child_globals - - def get_cell_vars(self): - return self.cells.keys() - -class ModuleScope(Scope): - __super_init = Scope.__init__ - - def __init__(self): - self.__super_init("global", self) - -class FunctionScope(Scope): - pass - -class GenExprScope(Scope): - __super_init = Scope.__init__ - - __counter = 1 - - def __init__(self, module, klass=None): - i = self.__counter - self.__counter += 1 - self.__super_init("generator expression<%d>"%i, module, klass) - self.add_param('.0') - - def get_names(self): - keys = Scope.get_names(self) - return keys - -class LambdaScope(FunctionScope): - __super_init = Scope.__init__ - - __counter = 1 - - def __init__(self, module, klass=None): - i = self.__counter - self.__counter += 1 - self.__super_init("lambda.%d" % i, module, klass) - -class ClassScope(Scope): - __super_init = Scope.__init__ - - def __init__(self, name, module): - self.__super_init(name, module, name) - -class SymbolVisitor: - def __init__(self): - self.scopes = {} - self.klass = None - - # node that define new scopes - - def visitModule(self, node): - scope = self.module = self.scopes[node] = ModuleScope() - self.visit(node.node, scope) - - visitExpression = visitModule - - def visitFunction(self, node, parent): - if node.decorators: - self.visit(node.decorators, parent) - parent.add_def(node.name) - for n in node.defaults: - self.visit(n, parent) - scope = FunctionScope(node.name, self.module, self.klass) - if parent.nested or isinstance(parent, FunctionScope): - scope.nested = 1 - self.scopes[node] = scope - self._do_args(scope, node.argnames) - self.visit(node.code, scope) - self.handle_free_vars(scope, parent) - - def visitGenExpr(self, node, parent): - scope = GenExprScope(self.module, self.klass); - if parent.nested or isinstance(parent, FunctionScope) \ - or isinstance(parent, GenExprScope): - scope.nested = 1 - - self.scopes[node] = scope - self.visit(node.code, scope) - - self.handle_free_vars(scope, parent) - - def visitGenExprInner(self, node, scope): - for genfor in node.quals: - self.visit(genfor, scope) - - self.visit(node.expr, scope) - - def visitGenExprFor(self, node, scope): - self.visit(node.assign, scope, 1) - self.visit(node.iter, scope) - for if_ in node.ifs: - self.visit(if_, scope) - - def visitGenExprIf(self, node, scope): - self.visit(node.test, scope) - - def visitLambda(self, node, parent, assign=0): - # Lambda is an expression, so it could appear in an expression - # context where assign is passed. The transformer should catch - # any code that has a lambda on the left-hand side. - assert not assign - - for n in node.defaults: - self.visit(n, parent) - scope = LambdaScope(self.module, self.klass) - if parent.nested or isinstance(parent, FunctionScope): - scope.nested = 1 - self.scopes[node] = scope - self._do_args(scope, node.argnames) - self.visit(node.code, scope) - self.handle_free_vars(scope, parent) - - def _do_args(self, scope, args): - for name in args: - if type(name) == types.TupleType: - self._do_args(scope, name) - else: - scope.add_param(name) - - def handle_free_vars(self, scope, parent): - parent.add_child(scope) - scope.handle_children() - - def visitClass(self, node, parent): - parent.add_def(node.name) - for n in node.bases: - self.visit(n, parent) - scope = ClassScope(node.name, self.module) - if parent.nested or isinstance(parent, FunctionScope): - scope.nested = 1 - if node.doc is not None: - scope.add_def('__doc__') - scope.add_def('__module__') - self.scopes[node] = scope - prev = self.klass - self.klass = node.name - self.visit(node.code, scope) - self.klass = prev - self.handle_free_vars(scope, parent) - - # name can be a def or a use - - # XXX a few calls and nodes expect a third "assign" arg that is - # true if the name is being used as an assignment. only - # expressions contained within statements may have the assign arg. - - def visitName(self, node, scope, assign=0): - if assign: - scope.add_def(node.name) - else: - scope.add_use(node.name) - - # operations that bind new names - - def visitFor(self, node, scope): - self.visit(node.assign, scope, 1) - self.visit(node.list, scope) - self.visit(node.body, scope) - if node.else_: - self.visit(node.else_, scope) - - def visitFrom(self, node, scope): - for name, asname in node.names: - if name == "*": - continue - scope.add_def(asname or name) - - def visitImport(self, node, scope): - for name, asname in node.names: - i = name.find(".") - if i > -1: - name = name[:i] - scope.add_def(asname or name) - - def visitGlobal(self, node, scope): - for name in node.names: - scope.add_global(name) - - def visitAssign(self, node, scope): - """Propagate assignment flag down to child nodes. - - The Assign node doesn't itself contains the variables being - assigned to. Instead, the children in node.nodes are visited - with the assign flag set to true. When the names occur in - those nodes, they are marked as defs. - - Some names that occur in an assignment target are not bound by - the assignment, e.g. a name occurring inside a slice. The - visitor handles these nodes specially; they do not propagate - the assign flag to their children. - """ - for n in node.nodes: - self.visit(n, scope, 1) - self.visit(node.expr, scope) - - def visitAssName(self, node, scope, assign=1): - scope.add_def(node.name) - - def visitAssAttr(self, node, scope, assign=0): - self.visit(node.expr, scope, 0) - - def visitSubscript(self, node, scope, assign=0): - self.visit(node.expr, scope, 0) - for n in node.subs: - self.visit(n, scope, 0) - - def visitSlice(self, node, scope, assign=0): - self.visit(node.expr, scope, 0) - if node.lower: - self.visit(node.lower, scope, 0) - if node.upper: - self.visit(node.upper, scope, 0) - - def visitAugAssign(self, node, scope): - # If the LHS is a name, then this counts as assignment. - # Otherwise, it's just use. - self.visit(node.node, scope) - if isinstance(node.node, ast.Name): - self.visit(node.node, scope, 1) # XXX worry about this - self.visit(node.expr, scope) - - # prune if statements if tests are false - - _const_types = types.StringType, types.IntType, types.FloatType - - def visitIf(self, node, scope): - for test, body in node.tests: - if isinstance(test, ast.Const): - if type(test.value) in self._const_types: - if not test.value: - continue - self.visit(test, scope) - self.visit(body, scope) - if node.else_: - self.visit(node.else_, scope) - - # a yield statement signals a generator - - def visitYield(self, node, scope): - scope.generator = 1 - self.visit(node.value, scope) - -def list_eq(l1, l2): - return sorted(l1) == sorted(l2) - -if __name__ == "__main__": - import sys - from compiler import parseFile, walk - import symtable - - def get_names(syms): - return [s for s in [s.get_name() for s in syms.get_symbols()] - if not (s.startswith('_[') or s.startswith('.'))] - - for file in sys.argv[1:]: - print file - f = open(file) - buf = f.read() - f.close() - syms = symtable.symtable(buf, file, "exec") - mod_names = get_names(syms) - tree = parseFile(file) - s = SymbolVisitor() - walk(tree, s) - - # compare module-level symbols - names2 = s.scopes[tree].get_names() - - if not list_eq(mod_names, names2): - print - print "oops", file - print sorted(mod_names) - print sorted(names2) - sys.exit(-1) - - d = {} - d.update(s.scopes) - del d[tree] - scopes = d.values() - del d - - for s in syms.get_symbols(): - if s.is_namespace(): - l = [sc for sc in scopes - if sc.name == s.get_name()] - if len(l) > 1: - print "skipping", s.get_name() - else: - if not list_eq(get_names(s.get_namespace()), - l[0].get_names()): - print s.get_name() - print sorted(get_names(s.get_namespace())) - print sorted(l[0].get_names()) - sys.exit(-1) diff --git a/python/Lib/compiler/syntax.py b/python/Lib/compiler/syntax.py deleted file mode 100755 index a45d9c2cf6..0000000000 --- a/python/Lib/compiler/syntax.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Check for errs in the AST. - -The Python parser does not catch all syntax errors. Others, like -assignments with invalid targets, are caught in the code generation -phase. - -The compiler package catches some errors in the transformer module. -But it seems clearer to write checkers that use the AST to detect -errors. -""" - -from compiler import ast, walk - -def check(tree, multi=None): - v = SyntaxErrorChecker(multi) - walk(tree, v) - return v.errors - -class SyntaxErrorChecker: - """A visitor to find syntax errors in the AST.""" - - def __init__(self, multi=None): - """Create new visitor object. - - If optional argument multi is not None, then print messages - for each error rather than raising a SyntaxError for the - first. - """ - self.multi = multi - self.errors = 0 - - def error(self, node, msg): - self.errors = self.errors + 1 - if self.multi is not None: - print "%s:%s: %s" % (node.filename, node.lineno, msg) - else: - raise SyntaxError, "%s (%s:%s)" % (msg, node.filename, node.lineno) - - def visitAssign(self, node): - # the transformer module handles many of these - pass -## for target in node.nodes: -## if isinstance(target, ast.AssList): -## if target.lineno is None: -## target.lineno = node.lineno -## self.error(target, "can't assign to list comprehension") diff --git a/python/Lib/compiler/transformer.py b/python/Lib/compiler/transformer.py deleted file mode 100755 index d4f4613f48..0000000000 --- a/python/Lib/compiler/transformer.py +++ /dev/null @@ -1,1535 +0,0 @@ -"""Parse tree transformation module. - -Transforms Python source code into an abstract syntax tree (AST) -defined in the ast module. - -The simplest ways to invoke this module are via parse and parseFile. -parse(buf) -> AST -parseFile(path) -> AST -""" - -# Original version written by Greg Stein (gstein@lyra.org) -# and Bill Tutt (rassilon@lima.mudlib.org) -# February 1997. -# -# Modifications and improvements for Python 2.0 by Jeremy Hylton and -# Mark Hammond -# -# Some fixes to try to have correct line number on almost all nodes -# (except Module, Discard and Stmt) added by Sylvain Thenault -# -# Portions of this file are: -# Copyright (C) 1997-1998 Greg Stein. All Rights Reserved. -# -# This module is provided under a BSD-ish license. See -# http://www.opensource.org/licenses/bsd-license.html -# and replace OWNER, ORGANIZATION, and YEAR as appropriate. - -from compiler.ast import * -import parser -import symbol -import token - -class WalkerError(StandardError): - pass - -from compiler.consts import CO_VARARGS, CO_VARKEYWORDS -from compiler.consts import OP_ASSIGN, OP_DELETE, OP_APPLY - -def parseFile(path): - f = open(path, "U") - # XXX The parser API tolerates files without a trailing newline, - # but not strings without a trailing newline. Always add an extra - # newline to the file contents, since we're going through the string - # version of the API. - src = f.read() + "\n" - f.close() - return parse(src) - -def parse(buf, mode="exec"): - if mode == "exec" or mode == "single": - return Transformer().parsesuite(buf) - elif mode == "eval": - return Transformer().parseexpr(buf) - else: - raise ValueError("compile() arg 3 must be" - " 'exec' or 'eval' or 'single'") - -def asList(nodes): - l = [] - for item in nodes: - if hasattr(item, "asList"): - l.append(item.asList()) - else: - if type(item) is type( (None, None) ): - l.append(tuple(asList(item))) - elif type(item) is type( [] ): - l.append(asList(item)) - else: - l.append(item) - return l - -def extractLineNo(ast): - if not isinstance(ast[1], tuple): - # get a terminal node - return ast[2] - for child in ast[1:]: - if isinstance(child, tuple): - lineno = extractLineNo(child) - if lineno is not None: - return lineno - -def Node(*args): - kind = args[0] - if kind in nodes: - try: - return nodes[kind](*args[1:]) - except TypeError: - print nodes[kind], len(args), args - raise - else: - raise WalkerError, "Can't find appropriate Node type: %s" % str(args) - #return apply(ast.Node, args) - -class Transformer: - """Utility object for transforming Python parse trees. - - Exposes the following methods: - tree = transform(ast_tree) - tree = parsesuite(text) - tree = parseexpr(text) - tree = parsefile(fileob | filename) - """ - - def __init__(self): - self._dispatch = {} - for value, name in symbol.sym_name.items(): - if hasattr(self, name): - self._dispatch[value] = getattr(self, name) - self._dispatch[token.NEWLINE] = self.com_NEWLINE - self._atom_dispatch = {token.LPAR: self.atom_lpar, - token.LSQB: self.atom_lsqb, - token.LBRACE: self.atom_lbrace, - token.BACKQUOTE: self.atom_backquote, - token.NUMBER: self.atom_number, - token.STRING: self.atom_string, - token.NAME: self.atom_name, - } - self.encoding = None - - def transform(self, tree): - """Transform an AST into a modified parse tree.""" - if not (isinstance(tree, tuple) or isinstance(tree, list)): - tree = parser.st2tuple(tree, line_info=1) - return self.compile_node(tree) - - def parsesuite(self, text): - """Return a modified parse tree for the given suite text.""" - return self.transform(parser.suite(text)) - - def parseexpr(self, text): - """Return a modified parse tree for the given expression text.""" - return self.transform(parser.expr(text)) - - def parsefile(self, file): - """Return a modified parse tree for the contents of the given file.""" - if type(file) == type(''): - file = open(file) - return self.parsesuite(file.read()) - - # -------------------------------------------------------------- - # - # PRIVATE METHODS - # - - def compile_node(self, node): - ### emit a line-number node? - n = node[0] - - if n == symbol.encoding_decl: - self.encoding = node[2] - node = node[1] - n = node[0] - - if n == symbol.single_input: - return self.single_input(node[1:]) - if n == symbol.file_input: - return self.file_input(node[1:]) - if n == symbol.eval_input: - return self.eval_input(node[1:]) - if n == symbol.lambdef: - return self.lambdef(node[1:]) - if n == symbol.funcdef: - return self.funcdef(node[1:]) - if n == symbol.classdef: - return self.classdef(node[1:]) - - raise WalkerError, ('unexpected node type', n) - - def single_input(self, node): - ### do we want to do anything about being "interactive" ? - - # NEWLINE | simple_stmt | compound_stmt NEWLINE - n = node[0][0] - if n != token.NEWLINE: - return self.com_stmt(node[0]) - - return Pass() - - def file_input(self, nodelist): - doc = self.get_docstring(nodelist, symbol.file_input) - if doc is not None: - i = 1 - else: - i = 0 - stmts = [] - for node in nodelist[i:]: - if node[0] != token.ENDMARKER and node[0] != token.NEWLINE: - self.com_append_stmt(stmts, node) - return Module(doc, Stmt(stmts)) - - def eval_input(self, nodelist): - # from the built-in function input() - ### is this sufficient? - return Expression(self.com_node(nodelist[0])) - - def decorator_name(self, nodelist): - listlen = len(nodelist) - assert listlen >= 1 and listlen % 2 == 1 - - item = self.atom_name(nodelist) - i = 1 - while i < listlen: - assert nodelist[i][0] == token.DOT - assert nodelist[i + 1][0] == token.NAME - item = Getattr(item, nodelist[i + 1][1]) - i += 2 - - return item - - def decorator(self, nodelist): - # '@' dotted_name [ '(' [arglist] ')' ] - assert len(nodelist) in (3, 5, 6) - assert nodelist[0][0] == token.AT - assert nodelist[-1][0] == token.NEWLINE - - assert nodelist[1][0] == symbol.dotted_name - funcname = self.decorator_name(nodelist[1][1:]) - - if len(nodelist) > 3: - assert nodelist[2][0] == token.LPAR - expr = self.com_call_function(funcname, nodelist[3]) - else: - expr = funcname - - return expr - - def decorators(self, nodelist): - # decorators: decorator ([NEWLINE] decorator)* NEWLINE - items = [] - for dec_nodelist in nodelist: - assert dec_nodelist[0] == symbol.decorator - items.append(self.decorator(dec_nodelist[1:])) - return Decorators(items) - - def decorated(self, nodelist): - assert nodelist[0][0] == symbol.decorators - if nodelist[1][0] == symbol.funcdef: - n = [nodelist[0]] + list(nodelist[1][1:]) - return self.funcdef(n) - elif nodelist[1][0] == symbol.classdef: - decorators = self.decorators(nodelist[0][1:]) - cls = self.classdef(nodelist[1][1:]) - cls.decorators = decorators - return cls - raise WalkerError() - - def funcdef(self, nodelist): - # -6 -5 -4 -3 -2 -1 - # funcdef: [decorators] 'def' NAME parameters ':' suite - # parameters: '(' [varargslist] ')' - - if len(nodelist) == 6: - assert nodelist[0][0] == symbol.decorators - decorators = self.decorators(nodelist[0][1:]) - else: - assert len(nodelist) == 5 - decorators = None - - lineno = nodelist[-4][2] - name = nodelist[-4][1] - args = nodelist[-3][2] - - if args[0] == symbol.varargslist: - names, defaults, flags = self.com_arglist(args[1:]) - else: - names = defaults = () - flags = 0 - doc = self.get_docstring(nodelist[-1]) - - # code for function - code = self.com_node(nodelist[-1]) - - if doc is not None: - assert isinstance(code, Stmt) - assert isinstance(code.nodes[0], Discard) - del code.nodes[0] - return Function(decorators, name, names, defaults, flags, doc, code, - lineno=lineno) - - def lambdef(self, nodelist): - # lambdef: 'lambda' [varargslist] ':' test - if nodelist[2][0] == symbol.varargslist: - names, defaults, flags = self.com_arglist(nodelist[2][1:]) - else: - names = defaults = () - flags = 0 - - # code for lambda - code = self.com_node(nodelist[-1]) - - return Lambda(names, defaults, flags, code, lineno=nodelist[1][2]) - old_lambdef = lambdef - - def classdef(self, nodelist): - # classdef: 'class' NAME ['(' [testlist] ')'] ':' suite - - name = nodelist[1][1] - doc = self.get_docstring(nodelist[-1]) - if nodelist[2][0] == token.COLON: - bases = [] - elif nodelist[3][0] == token.RPAR: - bases = [] - else: - bases = self.com_bases(nodelist[3]) - - # code for class - code = self.com_node(nodelist[-1]) - - if doc is not None: - assert isinstance(code, Stmt) - assert isinstance(code.nodes[0], Discard) - del code.nodes[0] - - return Class(name, bases, doc, code, lineno=nodelist[1][2]) - - def stmt(self, nodelist): - return self.com_stmt(nodelist[0]) - - small_stmt = stmt - flow_stmt = stmt - compound_stmt = stmt - - def simple_stmt(self, nodelist): - # small_stmt (';' small_stmt)* [';'] NEWLINE - stmts = [] - for i in range(0, len(nodelist), 2): - self.com_append_stmt(stmts, nodelist[i]) - return Stmt(stmts) - - def parameters(self, nodelist): - raise WalkerError - - def varargslist(self, nodelist): - raise WalkerError - - def fpdef(self, nodelist): - raise WalkerError - - def fplist(self, nodelist): - raise WalkerError - - def dotted_name(self, nodelist): - raise WalkerError - - def comp_op(self, nodelist): - raise WalkerError - - def trailer(self, nodelist): - raise WalkerError - - def sliceop(self, nodelist): - raise WalkerError - - def argument(self, nodelist): - raise WalkerError - - # -------------------------------------------------------------- - # - # STATEMENT NODES (invoked by com_node()) - # - - def expr_stmt(self, nodelist): - # augassign testlist | testlist ('=' testlist)* - en = nodelist[-1] - exprNode = self.lookup_node(en)(en[1:]) - if len(nodelist) == 1: - return Discard(exprNode, lineno=exprNode.lineno) - if nodelist[1][0] == token.EQUAL: - nodesl = [] - for i in range(0, len(nodelist) - 2, 2): - nodesl.append(self.com_assign(nodelist[i], OP_ASSIGN)) - return Assign(nodesl, exprNode, lineno=nodelist[1][2]) - else: - lval = self.com_augassign(nodelist[0]) - op = self.com_augassign_op(nodelist[1]) - return AugAssign(lval, op[1], exprNode, lineno=op[2]) - raise WalkerError, "can't get here" - - def print_stmt(self, nodelist): - # print ([ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ]) - items = [] - if len(nodelist) == 1: - start = 1 - dest = None - elif nodelist[1][0] == token.RIGHTSHIFT: - assert len(nodelist) == 3 \ - or nodelist[3][0] == token.COMMA - dest = self.com_node(nodelist[2]) - start = 4 - else: - dest = None - start = 1 - for i in range(start, len(nodelist), 2): - items.append(self.com_node(nodelist[i])) - if nodelist[-1][0] == token.COMMA: - return Print(items, dest, lineno=nodelist[0][2]) - return Printnl(items, dest, lineno=nodelist[0][2]) - - def del_stmt(self, nodelist): - return self.com_assign(nodelist[1], OP_DELETE) - - def pass_stmt(self, nodelist): - return Pass(lineno=nodelist[0][2]) - - def break_stmt(self, nodelist): - return Break(lineno=nodelist[0][2]) - - def continue_stmt(self, nodelist): - return Continue(lineno=nodelist[0][2]) - - def return_stmt(self, nodelist): - # return: [testlist] - if len(nodelist) < 2: - return Return(Const(None), lineno=nodelist[0][2]) - return Return(self.com_node(nodelist[1]), lineno=nodelist[0][2]) - - def yield_stmt(self, nodelist): - expr = self.com_node(nodelist[0]) - return Discard(expr, lineno=expr.lineno) - - def yield_expr(self, nodelist): - if len(nodelist) > 1: - value = self.com_node(nodelist[1]) - else: - value = Const(None) - return Yield(value, lineno=nodelist[0][2]) - - def raise_stmt(self, nodelist): - # raise: [test [',' test [',' test]]] - if len(nodelist) > 5: - expr3 = self.com_node(nodelist[5]) - else: - expr3 = None - if len(nodelist) > 3: - expr2 = self.com_node(nodelist[3]) - else: - expr2 = None - if len(nodelist) > 1: - expr1 = self.com_node(nodelist[1]) - else: - expr1 = None - return Raise(expr1, expr2, expr3, lineno=nodelist[0][2]) - - def import_stmt(self, nodelist): - # import_stmt: import_name | import_from - assert len(nodelist) == 1 - return self.com_node(nodelist[0]) - - def import_name(self, nodelist): - # import_name: 'import' dotted_as_names - return Import(self.com_dotted_as_names(nodelist[1]), - lineno=nodelist[0][2]) - - def import_from(self, nodelist): - # import_from: 'from' ('.'* dotted_name | '.') 'import' ('*' | - # '(' import_as_names ')' | import_as_names) - assert nodelist[0][1] == 'from' - idx = 1 - while nodelist[idx][1] == '.': - idx += 1 - level = idx - 1 - if nodelist[idx][0] == symbol.dotted_name: - fromname = self.com_dotted_name(nodelist[idx]) - idx += 1 - else: - fromname = "" - assert nodelist[idx][1] == 'import' - if nodelist[idx + 1][0] == token.STAR: - return From(fromname, [('*', None)], level, - lineno=nodelist[0][2]) - else: - node = nodelist[idx + 1 + (nodelist[idx + 1][0] == token.LPAR)] - return From(fromname, self.com_import_as_names(node), level, - lineno=nodelist[0][2]) - - def global_stmt(self, nodelist): - # global: NAME (',' NAME)* - names = [] - for i in range(1, len(nodelist), 2): - names.append(nodelist[i][1]) - return Global(names, lineno=nodelist[0][2]) - - def exec_stmt(self, nodelist): - # exec_stmt: 'exec' expr ['in' expr [',' expr]] - expr1 = self.com_node(nodelist[1]) - if len(nodelist) >= 4: - expr2 = self.com_node(nodelist[3]) - if len(nodelist) >= 6: - expr3 = self.com_node(nodelist[5]) - else: - expr3 = None - else: - expr2 = expr3 = None - - return Exec(expr1, expr2, expr3, lineno=nodelist[0][2]) - - def assert_stmt(self, nodelist): - # 'assert': test, [',' test] - expr1 = self.com_node(nodelist[1]) - if (len(nodelist) == 4): - expr2 = self.com_node(nodelist[3]) - else: - expr2 = None - return Assert(expr1, expr2, lineno=nodelist[0][2]) - - def if_stmt(self, nodelist): - # if: test ':' suite ('elif' test ':' suite)* ['else' ':' suite] - tests = [] - for i in range(0, len(nodelist) - 3, 4): - testNode = self.com_node(nodelist[i + 1]) - suiteNode = self.com_node(nodelist[i + 3]) - tests.append((testNode, suiteNode)) - - if len(nodelist) % 4 == 3: - elseNode = self.com_node(nodelist[-1]) -## elseNode.lineno = nodelist[-1][1][2] - else: - elseNode = None - return If(tests, elseNode, lineno=nodelist[0][2]) - - def while_stmt(self, nodelist): - # 'while' test ':' suite ['else' ':' suite] - - testNode = self.com_node(nodelist[1]) - bodyNode = self.com_node(nodelist[3]) - - if len(nodelist) > 4: - elseNode = self.com_node(nodelist[6]) - else: - elseNode = None - - return While(testNode, bodyNode, elseNode, lineno=nodelist[0][2]) - - def for_stmt(self, nodelist): - # 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite] - - assignNode = self.com_assign(nodelist[1], OP_ASSIGN) - listNode = self.com_node(nodelist[3]) - bodyNode = self.com_node(nodelist[5]) - - if len(nodelist) > 8: - elseNode = self.com_node(nodelist[8]) - else: - elseNode = None - - return For(assignNode, listNode, bodyNode, elseNode, - lineno=nodelist[0][2]) - - def try_stmt(self, nodelist): - return self.com_try_except_finally(nodelist) - - def with_stmt(self, nodelist): - return self.com_with(nodelist) - - def with_var(self, nodelist): - return self.com_with_var(nodelist) - - def suite(self, nodelist): - # simple_stmt | NEWLINE INDENT NEWLINE* (stmt NEWLINE*)+ DEDENT - if len(nodelist) == 1: - return self.com_stmt(nodelist[0]) - - stmts = [] - for node in nodelist: - if node[0] == symbol.stmt: - self.com_append_stmt(stmts, node) - return Stmt(stmts) - - # -------------------------------------------------------------- - # - # EXPRESSION NODES (invoked by com_node()) - # - - def testlist(self, nodelist): - # testlist: expr (',' expr)* [','] - # testlist_safe: test [(',' test)+ [',']] - # exprlist: expr (',' expr)* [','] - return self.com_binary(Tuple, nodelist) - - testlist_safe = testlist # XXX - testlist1 = testlist - exprlist = testlist - - def testlist_comp(self, nodelist): - # test ( comp_for | (',' test)* [','] ) - assert nodelist[0][0] == symbol.test - if len(nodelist) == 2 and nodelist[1][0] == symbol.comp_for: - test = self.com_node(nodelist[0]) - return self.com_generator_expression(test, nodelist[1]) - return self.testlist(nodelist) - - def test(self, nodelist): - # or_test ['if' or_test 'else' test] | lambdef - if len(nodelist) == 1 and nodelist[0][0] == symbol.lambdef: - return self.lambdef(nodelist[0]) - then = self.com_node(nodelist[0]) - if len(nodelist) > 1: - assert len(nodelist) == 5 - assert nodelist[1][1] == 'if' - assert nodelist[3][1] == 'else' - test = self.com_node(nodelist[2]) - else_ = self.com_node(nodelist[4]) - return IfExp(test, then, else_, lineno=nodelist[1][2]) - return then - - def or_test(self, nodelist): - # and_test ('or' and_test)* | lambdef - if len(nodelist) == 1 and nodelist[0][0] == symbol.lambdef: - return self.lambdef(nodelist[0]) - return self.com_binary(Or, nodelist) - old_test = or_test - - def and_test(self, nodelist): - # not_test ('and' not_test)* - return self.com_binary(And, nodelist) - - def not_test(self, nodelist): - # 'not' not_test | comparison - result = self.com_node(nodelist[-1]) - if len(nodelist) == 2: - return Not(result, lineno=nodelist[0][2]) - return result - - def comparison(self, nodelist): - # comparison: expr (comp_op expr)* - node = self.com_node(nodelist[0]) - if len(nodelist) == 1: - return node - - results = [] - for i in range(2, len(nodelist), 2): - nl = nodelist[i-1] - - # comp_op: '<' | '>' | '=' | '>=' | '<=' | '<>' | '!=' | '==' - # | 'in' | 'not' 'in' | 'is' | 'is' 'not' - n = nl[1] - if n[0] == token.NAME: - type = n[1] - if len(nl) == 3: - if type == 'not': - type = 'not in' - else: - type = 'is not' - else: - type = _cmp_types[n[0]] - - lineno = nl[1][2] - results.append((type, self.com_node(nodelist[i]))) - - # we need a special "compare" node so that we can distinguish - # 3 < x < 5 from (3 < x) < 5 - # the two have very different semantics and results (note that the - # latter form is always true) - - return Compare(node, results, lineno=lineno) - - def expr(self, nodelist): - # xor_expr ('|' xor_expr)* - return self.com_binary(Bitor, nodelist) - - def xor_expr(self, nodelist): - # xor_expr ('^' xor_expr)* - return self.com_binary(Bitxor, nodelist) - - def and_expr(self, nodelist): - # xor_expr ('&' xor_expr)* - return self.com_binary(Bitand, nodelist) - - def shift_expr(self, nodelist): - # shift_expr ('<<'|'>>' shift_expr)* - node = self.com_node(nodelist[0]) - for i in range(2, len(nodelist), 2): - right = self.com_node(nodelist[i]) - if nodelist[i-1][0] == token.LEFTSHIFT: - node = LeftShift([node, right], lineno=nodelist[1][2]) - elif nodelist[i-1][0] == token.RIGHTSHIFT: - node = RightShift([node, right], lineno=nodelist[1][2]) - else: - raise ValueError, "unexpected token: %s" % nodelist[i-1][0] - return node - - def arith_expr(self, nodelist): - node = self.com_node(nodelist[0]) - for i in range(2, len(nodelist), 2): - right = self.com_node(nodelist[i]) - if nodelist[i-1][0] == token.PLUS: - node = Add([node, right], lineno=nodelist[1][2]) - elif nodelist[i-1][0] == token.MINUS: - node = Sub([node, right], lineno=nodelist[1][2]) - else: - raise ValueError, "unexpected token: %s" % nodelist[i-1][0] - return node - - def term(self, nodelist): - node = self.com_node(nodelist[0]) - for i in range(2, len(nodelist), 2): - right = self.com_node(nodelist[i]) - t = nodelist[i-1][0] - if t == token.STAR: - node = Mul([node, right]) - elif t == token.SLASH: - node = Div([node, right]) - elif t == token.PERCENT: - node = Mod([node, right]) - elif t == token.DOUBLESLASH: - node = FloorDiv([node, right]) - else: - raise ValueError, "unexpected token: %s" % t - node.lineno = nodelist[1][2] - return node - - def factor(self, nodelist): - elt = nodelist[0] - t = elt[0] - node = self.lookup_node(nodelist[-1])(nodelist[-1][1:]) - # need to handle (unary op)constant here... - if t == token.PLUS: - return UnaryAdd(node, lineno=elt[2]) - elif t == token.MINUS: - return UnarySub(node, lineno=elt[2]) - elif t == token.TILDE: - node = Invert(node, lineno=elt[2]) - return node - - def power(self, nodelist): - # power: atom trailer* ('**' factor)* - node = self.com_node(nodelist[0]) - for i in range(1, len(nodelist)): - elt = nodelist[i] - if elt[0] == token.DOUBLESTAR: - return Power([node, self.com_node(nodelist[i+1])], - lineno=elt[2]) - - node = self.com_apply_trailer(node, elt) - - return node - - def atom(self, nodelist): - return self._atom_dispatch[nodelist[0][0]](nodelist) - - def atom_lpar(self, nodelist): - if nodelist[1][0] == token.RPAR: - return Tuple((), lineno=nodelist[0][2]) - return self.com_node(nodelist[1]) - - def atom_lsqb(self, nodelist): - if nodelist[1][0] == token.RSQB: - return List((), lineno=nodelist[0][2]) - return self.com_list_constructor(nodelist[1]) - - def atom_lbrace(self, nodelist): - if nodelist[1][0] == token.RBRACE: - return Dict((), lineno=nodelist[0][2]) - return self.com_dictorsetmaker(nodelist[1]) - - def atom_backquote(self, nodelist): - return Backquote(self.com_node(nodelist[1])) - - def atom_number(self, nodelist): - ### need to verify this matches compile.c - k = eval(nodelist[0][1]) - return Const(k, lineno=nodelist[0][2]) - - def decode_literal(self, lit): - if self.encoding: - # this is particularly fragile & a bit of a - # hack... changes in compile.c:parsestr and - # tokenizer.c must be reflected here. - if self.encoding not in ['utf-8', 'iso-8859-1']: - lit = unicode(lit, 'utf-8').encode(self.encoding) - return eval("# coding: %s\n%s" % (self.encoding, lit)) - else: - return eval(lit) - - def atom_string(self, nodelist): - k = '' - for node in nodelist: - k += self.decode_literal(node[1]) - return Const(k, lineno=nodelist[0][2]) - - def atom_name(self, nodelist): - return Name(nodelist[0][1], lineno=nodelist[0][2]) - - # -------------------------------------------------------------- - # - # INTERNAL PARSING UTILITIES - # - - # The use of com_node() introduces a lot of extra stack frames, - # enough to cause a stack overflow compiling test.test_parser with - # the standard interpreter recursionlimit. The com_node() is a - # convenience function that hides the dispatch details, but comes - # at a very high cost. It is more efficient to dispatch directly - # in the callers. In these cases, use lookup_node() and call the - # dispatched node directly. - - def lookup_node(self, node): - return self._dispatch[node[0]] - - def com_node(self, node): - # Note: compile.c has handling in com_node for del_stmt, pass_stmt, - # break_stmt, stmt, small_stmt, flow_stmt, simple_stmt, - # and compound_stmt. - # We'll just dispatch them. - return self._dispatch[node[0]](node[1:]) - - def com_NEWLINE(self, *args): - # A ';' at the end of a line can make a NEWLINE token appear - # here, Render it harmless. (genc discards ('discard', - # ('const', xxxx)) Nodes) - return Discard(Const(None)) - - def com_arglist(self, nodelist): - # varargslist: - # (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) - # | fpdef ['=' test] (',' fpdef ['=' test])* [','] - # fpdef: NAME | '(' fplist ')' - # fplist: fpdef (',' fpdef)* [','] - names = [] - defaults = [] - flags = 0 - - i = 0 - while i < len(nodelist): - node = nodelist[i] - if node[0] == token.STAR or node[0] == token.DOUBLESTAR: - if node[0] == token.STAR: - node = nodelist[i+1] - if node[0] == token.NAME: - names.append(node[1]) - flags = flags | CO_VARARGS - i = i + 3 - - if i < len(nodelist): - # should be DOUBLESTAR - t = nodelist[i][0] - if t == token.DOUBLESTAR: - node = nodelist[i+1] - else: - raise ValueError, "unexpected token: %s" % t - names.append(node[1]) - flags = flags | CO_VARKEYWORDS - - break - - # fpdef: NAME | '(' fplist ')' - names.append(self.com_fpdef(node)) - - i = i + 1 - if i < len(nodelist) and nodelist[i][0] == token.EQUAL: - defaults.append(self.com_node(nodelist[i + 1])) - i = i + 2 - elif len(defaults): - # we have already seen an argument with default, but here - # came one without - raise SyntaxError, "non-default argument follows default argument" - - # skip the comma - i = i + 1 - - return names, defaults, flags - - def com_fpdef(self, node): - # fpdef: NAME | '(' fplist ')' - if node[1][0] == token.LPAR: - return self.com_fplist(node[2]) - return node[1][1] - - def com_fplist(self, node): - # fplist: fpdef (',' fpdef)* [','] - if len(node) == 2: - return self.com_fpdef(node[1]) - list = [] - for i in range(1, len(node), 2): - list.append(self.com_fpdef(node[i])) - return tuple(list) - - def com_dotted_name(self, node): - # String together the dotted names and return the string - name = "" - for n in node: - if type(n) == type(()) and n[0] == 1: - name = name + n[1] + '.' - return name[:-1] - - def com_dotted_as_name(self, node): - assert node[0] == symbol.dotted_as_name - node = node[1:] - dot = self.com_dotted_name(node[0][1:]) - if len(node) == 1: - return dot, None - assert node[1][1] == 'as' - assert node[2][0] == token.NAME - return dot, node[2][1] - - def com_dotted_as_names(self, node): - assert node[0] == symbol.dotted_as_names - node = node[1:] - names = [self.com_dotted_as_name(node[0])] - for i in range(2, len(node), 2): - names.append(self.com_dotted_as_name(node[i])) - return names - - def com_import_as_name(self, node): - assert node[0] == symbol.import_as_name - node = node[1:] - assert node[0][0] == token.NAME - if len(node) == 1: - return node[0][1], None - assert node[1][1] == 'as', node - assert node[2][0] == token.NAME - return node[0][1], node[2][1] - - def com_import_as_names(self, node): - assert node[0] == symbol.import_as_names - node = node[1:] - names = [self.com_import_as_name(node[0])] - for i in range(2, len(node), 2): - names.append(self.com_import_as_name(node[i])) - return names - - def com_bases(self, node): - bases = [] - for i in range(1, len(node), 2): - bases.append(self.com_node(node[i])) - return bases - - def com_try_except_finally(self, nodelist): - # ('try' ':' suite - # ((except_clause ':' suite)+ ['else' ':' suite] ['finally' ':' suite] - # | 'finally' ':' suite)) - - if nodelist[3][0] == token.NAME: - # first clause is a finally clause: only try-finally - return TryFinally(self.com_node(nodelist[2]), - self.com_node(nodelist[5]), - lineno=nodelist[0][2]) - - #tryexcept: [TryNode, [except_clauses], elseNode)] - clauses = [] - elseNode = None - finallyNode = None - for i in range(3, len(nodelist), 3): - node = nodelist[i] - if node[0] == symbol.except_clause: - # except_clause: 'except' [expr [(',' | 'as') expr]] */ - if len(node) > 2: - expr1 = self.com_node(node[2]) - if len(node) > 4: - expr2 = self.com_assign(node[4], OP_ASSIGN) - else: - expr2 = None - else: - expr1 = expr2 = None - clauses.append((expr1, expr2, self.com_node(nodelist[i+2]))) - - if node[0] == token.NAME: - if node[1] == 'else': - elseNode = self.com_node(nodelist[i+2]) - elif node[1] == 'finally': - finallyNode = self.com_node(nodelist[i+2]) - try_except = TryExcept(self.com_node(nodelist[2]), clauses, elseNode, - lineno=nodelist[0][2]) - if finallyNode: - return TryFinally(try_except, finallyNode, lineno=nodelist[0][2]) - else: - return try_except - - def com_with(self, nodelist): - # with_stmt: 'with' with_item (',' with_item)* ':' suite - body = self.com_node(nodelist[-1]) - for i in range(len(nodelist) - 3, 0, -2): - ret = self.com_with_item(nodelist[i], body, nodelist[0][2]) - if i == 1: - return ret - body = ret - - def com_with_item(self, nodelist, body, lineno): - # with_item: test ['as' expr] - if len(nodelist) == 4: - var = self.com_assign(nodelist[3], OP_ASSIGN) - else: - var = None - expr = self.com_node(nodelist[1]) - return With(expr, var, body, lineno=lineno) - - def com_augassign_op(self, node): - assert node[0] == symbol.augassign - return node[1] - - def com_augassign(self, node): - """Return node suitable for lvalue of augmented assignment - - Names, slices, and attributes are the only allowable nodes. - """ - l = self.com_node(node) - if l.__class__ in (Name, Slice, Subscript, Getattr): - return l - raise SyntaxError, "can't assign to %s" % l.__class__.__name__ - - def com_assign(self, node, assigning): - # return a node suitable for use as an "lvalue" - # loop to avoid trivial recursion - while 1: - t = node[0] - if t in (symbol.exprlist, symbol.testlist, symbol.testlist_safe, symbol.testlist_comp): - if len(node) > 2: - return self.com_assign_tuple(node, assigning) - node = node[1] - elif t in _assign_types: - if len(node) > 2: - raise SyntaxError, "can't assign to operator" - node = node[1] - elif t == symbol.power: - if node[1][0] != symbol.atom: - raise SyntaxError, "can't assign to operator" - if len(node) > 2: - primary = self.com_node(node[1]) - for i in range(2, len(node)-1): - ch = node[i] - if ch[0] == token.DOUBLESTAR: - raise SyntaxError, "can't assign to operator" - primary = self.com_apply_trailer(primary, ch) - return self.com_assign_trailer(primary, node[-1], - assigning) - node = node[1] - elif t == symbol.atom: - t = node[1][0] - if t == token.LPAR: - node = node[2] - if node[0] == token.RPAR: - raise SyntaxError, "can't assign to ()" - elif t == token.LSQB: - node = node[2] - if node[0] == token.RSQB: - raise SyntaxError, "can't assign to []" - return self.com_assign_list(node, assigning) - elif t == token.NAME: - return self.com_assign_name(node[1], assigning) - else: - raise SyntaxError, "can't assign to literal" - else: - raise SyntaxError, "bad assignment (%s)" % t - - def com_assign_tuple(self, node, assigning): - assigns = [] - for i in range(1, len(node), 2): - assigns.append(self.com_assign(node[i], assigning)) - return AssTuple(assigns, lineno=extractLineNo(node)) - - def com_assign_list(self, node, assigning): - assigns = [] - for i in range(1, len(node), 2): - if i + 1 < len(node): - if node[i + 1][0] == symbol.list_for: - raise SyntaxError, "can't assign to list comprehension" - assert node[i + 1][0] == token.COMMA, node[i + 1] - assigns.append(self.com_assign(node[i], assigning)) - return AssList(assigns, lineno=extractLineNo(node)) - - def com_assign_name(self, node, assigning): - return AssName(node[1], assigning, lineno=node[2]) - - def com_assign_trailer(self, primary, node, assigning): - t = node[1][0] - if t == token.DOT: - return self.com_assign_attr(primary, node[2], assigning) - if t == token.LSQB: - return self.com_subscriptlist(primary, node[2], assigning) - if t == token.LPAR: - raise SyntaxError, "can't assign to function call" - raise SyntaxError, "unknown trailer type: %s" % t - - def com_assign_attr(self, primary, node, assigning): - return AssAttr(primary, node[1], assigning, lineno=node[-1]) - - def com_binary(self, constructor, nodelist): - "Compile 'NODE (OP NODE)*' into (type, [ node1, ..., nodeN ])." - l = len(nodelist) - if l == 1: - n = nodelist[0] - return self.lookup_node(n)(n[1:]) - items = [] - for i in range(0, l, 2): - n = nodelist[i] - items.append(self.lookup_node(n)(n[1:])) - return constructor(items, lineno=extractLineNo(nodelist)) - - def com_stmt(self, node): - result = self.lookup_node(node)(node[1:]) - assert result is not None - if isinstance(result, Stmt): - return result - return Stmt([result]) - - def com_append_stmt(self, stmts, node): - result = self.lookup_node(node)(node[1:]) - assert result is not None - if isinstance(result, Stmt): - stmts.extend(result.nodes) - else: - stmts.append(result) - - def com_list_constructor(self, nodelist): - # listmaker: test ( list_for | (',' test)* [','] ) - values = [] - for i in range(1, len(nodelist)): - if nodelist[i][0] == symbol.list_for: - assert len(nodelist[i:]) == 1 - return self.com_list_comprehension(values[0], - nodelist[i]) - elif nodelist[i][0] == token.COMMA: - continue - values.append(self.com_node(nodelist[i])) - return List(values, lineno=values[0].lineno) - - def com_list_comprehension(self, expr, node): - return self.com_comprehension(expr, None, node, 'list') - - def com_comprehension(self, expr1, expr2, node, type): - # list_iter: list_for | list_if - # list_for: 'for' exprlist 'in' testlist [list_iter] - # list_if: 'if' test [list_iter] - - # XXX should raise SyntaxError for assignment - # XXX(avassalotti) Set and dict comprehensions should have generator - # semantics. In other words, they shouldn't leak - # variables outside of the comprehension's scope. - - lineno = node[1][2] - fors = [] - while node: - t = node[1][1] - if t == 'for': - assignNode = self.com_assign(node[2], OP_ASSIGN) - compNode = self.com_node(node[4]) - newfor = ListCompFor(assignNode, compNode, []) - newfor.lineno = node[1][2] - fors.append(newfor) - if len(node) == 5: - node = None - elif type == 'list': - node = self.com_list_iter(node[5]) - else: - node = self.com_comp_iter(node[5]) - elif t == 'if': - test = self.com_node(node[2]) - newif = ListCompIf(test, lineno=node[1][2]) - newfor.ifs.append(newif) - if len(node) == 3: - node = None - elif type == 'list': - node = self.com_list_iter(node[3]) - else: - node = self.com_comp_iter(node[3]) - else: - raise SyntaxError, \ - ("unexpected comprehension element: %s %d" - % (node, lineno)) - if type == 'list': - return ListComp(expr1, fors, lineno=lineno) - elif type == 'set': - return SetComp(expr1, fors, lineno=lineno) - elif type == 'dict': - return DictComp(expr1, expr2, fors, lineno=lineno) - else: - raise ValueError("unexpected comprehension type: " + repr(type)) - - def com_list_iter(self, node): - assert node[0] == symbol.list_iter - return node[1] - - def com_comp_iter(self, node): - assert node[0] == symbol.comp_iter - return node[1] - - def com_generator_expression(self, expr, node): - # comp_iter: comp_for | comp_if - # comp_for: 'for' exprlist 'in' test [comp_iter] - # comp_if: 'if' test [comp_iter] - - lineno = node[1][2] - fors = [] - while node: - t = node[1][1] - if t == 'for': - assignNode = self.com_assign(node[2], OP_ASSIGN) - genNode = self.com_node(node[4]) - newfor = GenExprFor(assignNode, genNode, [], - lineno=node[1][2]) - fors.append(newfor) - if (len(node)) == 5: - node = None - else: - node = self.com_comp_iter(node[5]) - elif t == 'if': - test = self.com_node(node[2]) - newif = GenExprIf(test, lineno=node[1][2]) - newfor.ifs.append(newif) - if len(node) == 3: - node = None - else: - node = self.com_comp_iter(node[3]) - else: - raise SyntaxError, \ - ("unexpected generator expression element: %s %d" - % (node, lineno)) - fors[0].is_outmost = True - return GenExpr(GenExprInner(expr, fors), lineno=lineno) - - def com_dictorsetmaker(self, nodelist): - # dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | - # (test (comp_for | (',' test)* [','])) ) - assert nodelist[0] == symbol.dictorsetmaker - nodelist = nodelist[1:] - if len(nodelist) == 1 or nodelist[1][0] == token.COMMA: - # set literal - items = [] - for i in range(0, len(nodelist), 2): - items.append(self.com_node(nodelist[i])) - return Set(items, lineno=items[0].lineno) - elif nodelist[1][0] == symbol.comp_for: - # set comprehension - expr = self.com_node(nodelist[0]) - return self.com_comprehension(expr, None, nodelist[1], 'set') - elif len(nodelist) > 3 and nodelist[3][0] == symbol.comp_for: - # dict comprehension - assert nodelist[1][0] == token.COLON - key = self.com_node(nodelist[0]) - value = self.com_node(nodelist[2]) - return self.com_comprehension(key, value, nodelist[3], 'dict') - else: - # dict literal - items = [] - for i in range(0, len(nodelist), 4): - items.append((self.com_node(nodelist[i]), - self.com_node(nodelist[i+2]))) - return Dict(items, lineno=items[0][0].lineno) - - def com_apply_trailer(self, primaryNode, nodelist): - t = nodelist[1][0] - if t == token.LPAR: - return self.com_call_function(primaryNode, nodelist[2]) - if t == token.DOT: - return self.com_select_member(primaryNode, nodelist[2]) - if t == token.LSQB: - return self.com_subscriptlist(primaryNode, nodelist[2], OP_APPLY) - - raise SyntaxError, 'unknown node type: %s' % t - - def com_select_member(self, primaryNode, nodelist): - if nodelist[0] != token.NAME: - raise SyntaxError, "member must be a name" - return Getattr(primaryNode, nodelist[1], lineno=nodelist[2]) - - def com_call_function(self, primaryNode, nodelist): - if nodelist[0] == token.RPAR: - return CallFunc(primaryNode, [], lineno=extractLineNo(nodelist)) - args = [] - kw = 0 - star_node = dstar_node = None - len_nodelist = len(nodelist) - i = 1 - while i < len_nodelist: - node = nodelist[i] - - if node[0]==token.STAR: - if star_node is not None: - raise SyntaxError, 'already have the varargs indentifier' - star_node = self.com_node(nodelist[i+1]) - i = i + 3 - continue - elif node[0]==token.DOUBLESTAR: - if dstar_node is not None: - raise SyntaxError, 'already have the kwargs indentifier' - dstar_node = self.com_node(nodelist[i+1]) - i = i + 3 - continue - - # positional or named parameters - kw, result = self.com_argument(node, kw, star_node) - - if len_nodelist != 2 and isinstance(result, GenExpr) \ - and len(node) == 3 and node[2][0] == symbol.comp_for: - # allow f(x for x in y), but reject f(x for x in y, 1) - # should use f((x for x in y), 1) instead of f(x for x in y, 1) - raise SyntaxError, 'generator expression needs parenthesis' - - args.append(result) - i = i + 2 - - return CallFunc(primaryNode, args, star_node, dstar_node, - lineno=extractLineNo(nodelist)) - - def com_argument(self, nodelist, kw, star_node): - if len(nodelist) == 3 and nodelist[2][0] == symbol.comp_for: - test = self.com_node(nodelist[1]) - return 0, self.com_generator_expression(test, nodelist[2]) - if len(nodelist) == 2: - if kw: - raise SyntaxError, "non-keyword arg after keyword arg" - if star_node: - raise SyntaxError, "only named arguments may follow *expression" - return 0, self.com_node(nodelist[1]) - result = self.com_node(nodelist[3]) - n = nodelist[1] - while len(n) == 2 and n[0] != token.NAME: - n = n[1] - if n[0] != token.NAME: - raise SyntaxError, "keyword can't be an expression (%s)"%n[0] - node = Keyword(n[1], result, lineno=n[2]) - return 1, node - - def com_subscriptlist(self, primary, nodelist, assigning): - # slicing: simple_slicing | extended_slicing - # simple_slicing: primary "[" short_slice "]" - # extended_slicing: primary "[" slice_list "]" - # slice_list: slice_item ("," slice_item)* [","] - - # backwards compat slice for '[i:j]' - if len(nodelist) == 2: - sub = nodelist[1] - if (sub[1][0] == token.COLON or \ - (len(sub) > 2 and sub[2][0] == token.COLON)) and \ - sub[-1][0] != symbol.sliceop: - return self.com_slice(primary, sub, assigning) - - subscripts = [] - for i in range(1, len(nodelist), 2): - subscripts.append(self.com_subscript(nodelist[i])) - return Subscript(primary, assigning, subscripts, - lineno=extractLineNo(nodelist)) - - def com_subscript(self, node): - # slice_item: expression | proper_slice | ellipsis - ch = node[1] - t = ch[0] - if t == token.DOT and node[2][0] == token.DOT: - return Ellipsis() - if t == token.COLON or len(node) > 2: - return self.com_sliceobj(node) - return self.com_node(ch) - - def com_sliceobj(self, node): - # proper_slice: short_slice | long_slice - # short_slice: [lower_bound] ":" [upper_bound] - # long_slice: short_slice ":" [stride] - # lower_bound: expression - # upper_bound: expression - # stride: expression - # - # Note: a stride may be further slicing... - - items = [] - - if node[1][0] == token.COLON: - items.append(Const(None)) - i = 2 - else: - items.append(self.com_node(node[1])) - # i == 2 is a COLON - i = 3 - - if i < len(node) and node[i][0] == symbol.test: - items.append(self.com_node(node[i])) - i = i + 1 - else: - items.append(Const(None)) - - # a short_slice has been built. look for long_slice now by looking - # for strides... - for j in range(i, len(node)): - ch = node[j] - if len(ch) == 2: - items.append(Const(None)) - else: - items.append(self.com_node(ch[2])) - return Sliceobj(items, lineno=extractLineNo(node)) - - def com_slice(self, primary, node, assigning): - # short_slice: [lower_bound] ":" [upper_bound] - lower = upper = None - if len(node) == 3: - if node[1][0] == token.COLON: - upper = self.com_node(node[2]) - else: - lower = self.com_node(node[1]) - elif len(node) == 4: - lower = self.com_node(node[1]) - upper = self.com_node(node[3]) - return Slice(primary, assigning, lower, upper, - lineno=extractLineNo(node)) - - def get_docstring(self, node, n=None): - if n is None: - n = node[0] - node = node[1:] - if n == symbol.suite: - if len(node) == 1: - return self.get_docstring(node[0]) - for sub in node: - if sub[0] == symbol.stmt: - return self.get_docstring(sub) - return None - if n == symbol.file_input: - for sub in node: - if sub[0] == symbol.stmt: - return self.get_docstring(sub) - return None - if n == symbol.atom: - if node[0][0] == token.STRING: - s = '' - for t in node: - s = s + eval(t[1]) - return s - return None - if n == symbol.stmt or n == symbol.simple_stmt \ - or n == symbol.small_stmt: - return self.get_docstring(node[0]) - if n in _doc_nodes and len(node) == 1: - return self.get_docstring(node[0]) - return None - - -_doc_nodes = [ - symbol.expr_stmt, - symbol.testlist, - symbol.testlist_safe, - symbol.test, - symbol.or_test, - symbol.and_test, - symbol.not_test, - symbol.comparison, - symbol.expr, - symbol.xor_expr, - symbol.and_expr, - symbol.shift_expr, - symbol.arith_expr, - symbol.term, - symbol.factor, - symbol.power, - ] - -# comp_op: '<' | '>' | '=' | '>=' | '<=' | '<>' | '!=' | '==' -# | 'in' | 'not' 'in' | 'is' | 'is' 'not' -_cmp_types = { - token.LESS : '<', - token.GREATER : '>', - token.EQEQUAL : '==', - token.EQUAL : '==', - token.LESSEQUAL : '<=', - token.GREATEREQUAL : '>=', - token.NOTEQUAL : '!=', - } - -_legal_node_types = [ - symbol.funcdef, - symbol.classdef, - symbol.stmt, - symbol.small_stmt, - symbol.flow_stmt, - symbol.simple_stmt, - symbol.compound_stmt, - symbol.expr_stmt, - symbol.print_stmt, - symbol.del_stmt, - symbol.pass_stmt, - symbol.break_stmt, - symbol.continue_stmt, - symbol.return_stmt, - symbol.raise_stmt, - symbol.import_stmt, - symbol.global_stmt, - symbol.exec_stmt, - symbol.assert_stmt, - symbol.if_stmt, - symbol.while_stmt, - symbol.for_stmt, - symbol.try_stmt, - symbol.with_stmt, - symbol.suite, - symbol.testlist, - symbol.testlist_safe, - symbol.test, - symbol.and_test, - symbol.not_test, - symbol.comparison, - symbol.exprlist, - symbol.expr, - symbol.xor_expr, - symbol.and_expr, - symbol.shift_expr, - symbol.arith_expr, - symbol.term, - symbol.factor, - symbol.power, - symbol.atom, - ] - -if hasattr(symbol, 'yield_stmt'): - _legal_node_types.append(symbol.yield_stmt) -if hasattr(symbol, 'yield_expr'): - _legal_node_types.append(symbol.yield_expr) - -_assign_types = [ - symbol.test, - symbol.or_test, - symbol.and_test, - symbol.not_test, - symbol.comparison, - symbol.expr, - symbol.xor_expr, - symbol.and_expr, - symbol.shift_expr, - symbol.arith_expr, - symbol.term, - symbol.factor, - ] - -_names = {} -for k, v in symbol.sym_name.items(): - _names[k] = v -for k, v in token.tok_name.items(): - _names[k] = v - -def debug_tree(tree): - l = [] - for elt in tree: - if isinstance(elt, int): - l.append(_names.get(elt, elt)) - elif isinstance(elt, str): - l.append(elt) - else: - l.append(debug_tree(elt)) - return l diff --git a/python/Lib/compiler/visitor.py b/python/Lib/compiler/visitor.py deleted file mode 100755 index f10f56011a..0000000000 --- a/python/Lib/compiler/visitor.py +++ /dev/null @@ -1,113 +0,0 @@ -from compiler import ast - -# XXX should probably rename ASTVisitor to ASTWalker -# XXX can it be made even more generic? - -class ASTVisitor: - """Performs a depth-first walk of the AST - - The ASTVisitor will walk the AST, performing either a preorder or - postorder traversal depending on which method is called. - - methods: - preorder(tree, visitor) - postorder(tree, visitor) - tree: an instance of ast.Node - visitor: an instance with visitXXX methods - - The ASTVisitor is responsible for walking over the tree in the - correct order. For each node, it checks the visitor argument for - a method named 'visitNodeType' where NodeType is the name of the - node's class, e.g. Class. If the method exists, it is called - with the node as its sole argument. - - The visitor method for a particular node type can control how - child nodes are visited during a preorder walk. (It can't control - the order during a postorder walk, because it is called _after_ - the walk has occurred.) The ASTVisitor modifies the visitor - argument by adding a visit method to the visitor; this method can - be used to visit a child node of arbitrary type. - """ - - VERBOSE = 0 - - def __init__(self): - self.node = None - self._cache = {} - - def default(self, node, *args): - for child in node.getChildNodes(): - self.dispatch(child, *args) - - def dispatch(self, node, *args): - self.node = node - klass = node.__class__ - meth = self._cache.get(klass, None) - if meth is None: - className = klass.__name__ - meth = getattr(self.visitor, 'visit' + className, self.default) - self._cache[klass] = meth -## if self.VERBOSE > 0: -## className = klass.__name__ -## if self.VERBOSE == 1: -## if meth == 0: -## print "dispatch", className -## else: -## print "dispatch", className, (meth and meth.__name__ or '') - return meth(node, *args) - - def preorder(self, tree, visitor, *args): - """Do preorder walk of tree using visitor""" - self.visitor = visitor - visitor.visit = self.dispatch - self.dispatch(tree, *args) # XXX *args make sense? - -class ExampleASTVisitor(ASTVisitor): - """Prints examples of the nodes that aren't visited - - This visitor-driver is only useful for development, when it's - helpful to develop a visitor incrementally, and get feedback on what - you still have to do. - """ - examples = {} - - def dispatch(self, node, *args): - self.node = node - meth = self._cache.get(node.__class__, None) - className = node.__class__.__name__ - if meth is None: - meth = getattr(self.visitor, 'visit' + className, 0) - self._cache[node.__class__] = meth - if self.VERBOSE > 1: - print "dispatch", className, (meth and meth.__name__ or '') - if meth: - meth(node, *args) - elif self.VERBOSE > 0: - klass = node.__class__ - if klass not in self.examples: - self.examples[klass] = klass - print - print self.visitor - print klass - for attr in dir(node): - if attr[0] != '_': - print "\t", "%-12.12s" % attr, getattr(node, attr) - print - return self.default(node, *args) - -# XXX this is an API change - -_walker = ASTVisitor -def walk(tree, visitor, walker=None, verbose=None): - if walker is None: - walker = _walker() - if verbose is not None: - walker.VERBOSE = verbose - walker.preorder(tree, visitor) - return walker.visitor - -def dumpNode(node): - print node.__class__ - for attr in dir(node): - if attr[0] != '_': - print "\t", "%-10.10s" % attr, getattr(node, attr) diff --git a/python/Lib/contextlib.py b/python/Lib/contextlib.py deleted file mode 100755 index f05205b01c..0000000000 --- a/python/Lib/contextlib.py +++ /dev/null @@ -1,154 +0,0 @@ -"""Utilities for with-statement contexts. See PEP 343.""" - -import sys -from functools import wraps -from warnings import warn - -__all__ = ["contextmanager", "nested", "closing"] - -class GeneratorContextManager(object): - """Helper for @contextmanager decorator.""" - - def __init__(self, gen): - self.gen = gen - - def __enter__(self): - try: - return self.gen.next() - except StopIteration: - raise RuntimeError("generator didn't yield") - - def __exit__(self, type, value, traceback): - if type is None: - try: - self.gen.next() - except StopIteration: - return - else: - raise RuntimeError("generator didn't stop") - else: - if value is None: - # Need to force instantiation so we can reliably - # tell if we get the same exception back - value = type() - try: - self.gen.throw(type, value, traceback) - raise RuntimeError("generator didn't stop after throw()") - except StopIteration, exc: - # Suppress the exception *unless* it's the same exception that - # was passed to throw(). This prevents a StopIteration - # raised inside the "with" statement from being suppressed - return exc is not value - except: - # only re-raise if it's *not* the exception that was - # passed to throw(), because __exit__() must not raise - # an exception unless __exit__() itself failed. But throw() - # has to raise the exception to signal propagation, so this - # fixes the impedance mismatch between the throw() protocol - # and the __exit__() protocol. - # - if sys.exc_info()[1] is not value: - raise - - -def contextmanager(func): - """@contextmanager decorator. - - Typical usage: - - @contextmanager - def some_generator(): - - try: - yield - finally: - - - This makes this: - - with some_generator() as : - - - equivalent to this: - - - try: - = - - finally: - - - """ - @wraps(func) - def helper(*args, **kwds): - return GeneratorContextManager(func(*args, **kwds)) - return helper - - -@contextmanager -def nested(*managers): - """Combine multiple context managers into a single nested context manager. - - This function has been deprecated in favour of the multiple manager form - of the with statement. - - The one advantage of this function over the multiple manager form of the - with statement is that argument unpacking allows it to be - used with a variable number of context managers as follows: - - with nested(*managers): - do_something() - - """ - warn("With-statements now directly support multiple context managers", - DeprecationWarning, 3) - exits = [] - vars = [] - exc = (None, None, None) - try: - for mgr in managers: - exit = mgr.__exit__ - enter = mgr.__enter__ - vars.append(enter()) - exits.append(exit) - yield vars - except: - exc = sys.exc_info() - finally: - while exits: - exit = exits.pop() - try: - if exit(*exc): - exc = (None, None, None) - except: - exc = sys.exc_info() - if exc != (None, None, None): - # Don't rely on sys.exc_info() still containing - # the right information. Another exception may - # have been raised and caught by an exit method - raise exc[0], exc[1], exc[2] - - -class closing(object): - """Context to automatically close something at the end of a block. - - Code like this: - - with closing(.open()) as f: - - - is equivalent to this: - - f = .open() - try: - - finally: - f.close() - - """ - def __init__(self, thing): - self.thing = thing - def __enter__(self): - return self.thing - def __exit__(self, *exc_info): - self.thing.close() diff --git a/python/Lib/cookielib.py b/python/Lib/cookielib.py deleted file mode 100755 index 26380ba5e4..0000000000 --- a/python/Lib/cookielib.py +++ /dev/null @@ -1,1810 +0,0 @@ -r"""HTTP cookie handling for web clients. - -This module has (now fairly distant) origins in Gisle Aas' Perl module -HTTP::Cookies, from the libwww-perl library. - -Docstrings, comments and debug strings in this code refer to the -attributes of the HTTP cookie system as cookie-attributes, to distinguish -them clearly from Python attributes. - -Class diagram (note that BSDDBCookieJar and the MSIE* classes are not -distributed with the Python standard library, but are available from -http://wwwsearch.sf.net/): - - CookieJar____ - / \ \ - FileCookieJar \ \ - / | \ \ \ - MozillaCookieJar | LWPCookieJar \ \ - | | \ - | ---MSIEBase | \ - | / | | \ - | / MSIEDBCookieJar BSDDBCookieJar - |/ - MSIECookieJar - -""" - -__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', - 'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError', - 'MozillaCookieJar'] - -import re, urlparse, copy, time, urllib -try: - import threading as _threading -except ImportError: - import dummy_threading as _threading -import httplib # only for the default HTTP port -from calendar import timegm - -debug = False # set to True to enable debugging via the logging module -logger = None - -def _debug(*args): - if not debug: - return - global logger - if not logger: - import logging - logger = logging.getLogger("cookielib") - return logger.debug(*args) - - -DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT) -MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " - "instance initialised with one)") - -def _warn_unhandled_exception(): - # There are a few catch-all except: statements in this module, for - # catching input that's bad in unexpected ways. Warn if any - # exceptions are caught there. - import warnings, traceback, StringIO - f = StringIO.StringIO() - traceback.print_exc(None, f) - msg = f.getvalue() - warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2) - - -# Date/time conversion -# ----------------------------------------------------------------------------- - -EPOCH_YEAR = 1970 -def _timegm(tt): - year, month, mday, hour, min, sec = tt[:6] - if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and - (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): - return timegm(tt) - else: - return None - -DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] -MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] -MONTHS_LOWER = [] -for month in MONTHS: MONTHS_LOWER.append(month.lower()) - -def time2isoz(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", - representing Universal Time (UTC, aka GMT). An example of this format is: - - 1994-11-24 08:49:37Z - - """ - if t is None: t = time.time() - year, mon, mday, hour, min, sec = time.gmtime(t)[:6] - return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( - year, mon, mday, hour, min, sec) - -def time2netscape(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like this: - - Wed, DD-Mon-YYYY HH:MM:SS GMT - - """ - if t is None: t = time.time() - year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] - return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % ( - DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec) - - -UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} - -TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") -def offset_from_tz_string(tz): - offset = None - if tz in UTC_ZONES: - offset = 0 - else: - m = TIMEZONE_RE.search(tz) - if m: - offset = 3600 * int(m.group(2)) - if m.group(3): - offset = offset + 60 * int(m.group(3)) - if m.group(1) == '-': - offset = -offset - return offset - -def _str2time(day, mon, yr, hr, min, sec, tz): - # translate month name to number - # month numbers start with 1 (January) - try: - mon = MONTHS_LOWER.index(mon.lower())+1 - except ValueError: - # maybe it's already a number - try: - imon = int(mon) - except ValueError: - return None - if 1 <= imon <= 12: - mon = imon - else: - return None - - # make sure clock elements are defined - if hr is None: hr = 0 - if min is None: min = 0 - if sec is None: sec = 0 - - yr = int(yr) - day = int(day) - hr = int(hr) - min = int(min) - sec = int(sec) - - if yr < 1000: - # find "obvious" year - cur_yr = time.localtime(time.time())[0] - m = cur_yr % 100 - tmp = yr - yr = yr + cur_yr - m - m = m - tmp - if abs(m) > 50: - if m > 0: yr = yr + 100 - else: yr = yr - 100 - - # convert UTC time tuple to seconds since epoch (not timezone-adjusted) - t = _timegm((yr, mon, day, hr, min, sec, tz)) - - if t is not None: - # adjust time using timezone string, to get absolute time since epoch - if tz is None: - tz = "UTC" - tz = tz.upper() - offset = offset_from_tz_string(tz) - if offset is None: - return None - t = t - offset - - return t - -STRICT_DATE_RE = re.compile( - r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " - "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") -WEEKDAY_RE = re.compile( - r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) -LOOSE_HTTP_DATE_RE = re.compile( - r"""^ - (\d\d?) # day - (?:\s+|[-\/]) - (\w+) # month - (?:\s+|[-\/]) - (\d+) # year - (?: - (?:\s+|:) # separator before clock - (\d\d?):(\d\d) # hour:min - (?::(\d\d))? # optional seconds - )? # optional clock - \s* - ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone - \s* - (?:\(\w+\))? # ASCII representation of timezone in parens. - \s*$""", re.X) -def http2time(text): - """Returns time in seconds since epoch of time represented by a string. - - Return value is an integer. - - None is returned if the format of str is unrecognized, the time is outside - the representable range, or the timezone string is not recognized. If the - string contains no timezone, UTC is assumed. - - The timezone in the string may be numerical (like "-0800" or "+0100") or a - string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the - timezone strings equivalent to UTC (zero offset) are known to the function. - - The function loosely parses the following formats: - - Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format - Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format - Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format - 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) - 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) - 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) - - The parser ignores leading and trailing whitespace. The time may be - absent. - - If the year is given with only 2 digits, the function will select the - century that makes the year closest to the current date. - - """ - # fast exit for strictly conforming string - m = STRICT_DATE_RE.search(text) - if m: - g = m.groups() - mon = MONTHS_LOWER.index(g[1].lower()) + 1 - tt = (int(g[2]), mon, int(g[0]), - int(g[3]), int(g[4]), float(g[5])) - return _timegm(tt) - - # No, we need some messy parsing... - - # clean up - text = text.lstrip() - text = WEEKDAY_RE.sub("", text, 1) # Useless weekday - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = LOOSE_HTTP_DATE_RE.search(text) - if m is not None: - day, mon, yr, hr, min, sec, tz = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - -ISO_DATE_RE = re.compile( - """^ - (\d{4}) # year - [-\/]? - (\d\d?) # numerical month - [-\/]? - (\d\d?) # day - (?: - (?:\s+|[-:Tt]) # separator before clock - (\d\d?):?(\d\d) # hour:min - (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) - )? # optional clock - \s* - ([-+]?\d\d?:?(:?\d\d)? - |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) - \s*$""", re.X) -def iso2time(text): - """ - As for http2time, but parses the ISO 8601 formats: - - 1994-02-03 14:15:29 -0100 -- ISO 8601 format - 1994-02-03 14:15:29 -- zone is optional - 1994-02-03 -- only date - 1994-02-03T14:15:29 -- Use T as separator - 19940203T141529Z -- ISO 8601 compact format - 19940203 -- only date - - """ - # clean up - text = text.lstrip() - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = ISO_DATE_RE.search(text) - if m is not None: - # XXX there's an extra bit of the timezone I'm ignoring here: is - # this the right thing to do? - yr, mon, day, hr, min, sec, tz, _ = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - - -# Header parsing -# ----------------------------------------------------------------------------- - -def unmatched(match): - """Return unmatched part of re.Match object.""" - start, end = match.span(0) - return match.string[:start]+match.string[end:] - -HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") -HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") -HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") -HEADER_ESCAPE_RE = re.compile(r"\\(.)") -def split_header_words(header_values): - r"""Parse header values into a list of lists containing key,value pairs. - - The function knows how to deal with ",", ";" and "=" as well as quoted - values after "=". A list of space separated tokens are parsed as if they - were separated by ";". - - If the header_values passed as argument contains multiple values, then they - are treated as if they were a single value separated by comma ",". - - This means that this function is useful for parsing header fields that - follow this syntax (BNF as from the HTTP/1.1 specification, but we relax - the requirement for tokens). - - headers = #header - header = (token | parameter) *( [";"] (token | parameter)) - - token = 1* - separators = "(" | ")" | "<" | ">" | "@" - | "," | ";" | ":" | "\" | <"> - | "/" | "[" | "]" | "?" | "=" - | "{" | "}" | SP | HT - - quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) - qdtext = > - quoted-pair = "\" CHAR - - parameter = attribute "=" value - attribute = token - value = token | quoted-string - - Each header is represented by a list of key/value pairs. The value for a - simple token (not part of a parameter) is None. Syntactically incorrect - headers will not necessarily be parsed as you would want. - - This is easier to describe with some examples: - - >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) - [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] - >>> split_header_words(['text/html; charset="iso-8859-1"']) - [[('text/html', None), ('charset', 'iso-8859-1')]] - >>> split_header_words([r'Basic realm="\"foo\bar\""']) - [[('Basic', None), ('realm', '"foobar"')]] - - """ - assert not isinstance(header_values, basestring) - result = [] - for text in header_values: - orig_text = text - pairs = [] - while text: - m = HEADER_TOKEN_RE.search(text) - if m: - text = unmatched(m) - name = m.group(1) - m = HEADER_QUOTED_VALUE_RE.search(text) - if m: # quoted value - text = unmatched(m) - value = m.group(1) - value = HEADER_ESCAPE_RE.sub(r"\1", value) - else: - m = HEADER_VALUE_RE.search(text) - if m: # unquoted value - text = unmatched(m) - value = m.group(1) - value = value.rstrip() - else: - # no value, a lone token - value = None - pairs.append((name, value)) - elif text.lstrip().startswith(","): - # concatenated headers, as per RFC 2616 section 4.2 - text = text.lstrip()[1:] - if pairs: result.append(pairs) - pairs = [] - else: - # skip junk - non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) - assert nr_junk_chars > 0, ( - "split_header_words bug: '%s', '%s', %s" % - (orig_text, text, pairs)) - text = non_junk - if pairs: result.append(pairs) - return result - -HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") -def join_header_words(lists): - """Do the inverse (almost) of the conversion done by split_header_words. - - Takes a list of lists of (key, value) pairs and produces a single header - value. Attribute values are quoted if needed. - - >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) - 'text/plain; charset="iso-8859/1"' - >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) - 'text/plain, charset="iso-8859/1"' - - """ - headers = [] - for pairs in lists: - attr = [] - for k, v in pairs: - if v is not None: - if not re.search(r"^\w+$", v): - v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ - v = '"%s"' % v - k = "%s=%s" % (k, v) - attr.append(k) - if attr: headers.append("; ".join(attr)) - return ", ".join(headers) - -def _strip_quotes(text): - if text.startswith('"'): - text = text[1:] - if text.endswith('"'): - text = text[:-1] - return text - -def parse_ns_headers(ns_headers): - """Ad-hoc parser for Netscape protocol cookie-attributes. - - The old Netscape cookie format for Set-Cookie can for instance contain - an unquoted "," in the expires field, so we have to use this ad-hoc - parser instead of split_header_words. - - XXX This may not make the best possible effort to parse all the crap - that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient - parser is probably better, so could do worse than following that if - this ever gives any trouble. - - Currently, this is also used for parsing RFC 2109 cookies. - - """ - known_attrs = ("expires", "domain", "path", "secure", - # RFC 2109 attrs (may turn up in Netscape cookies, too) - "version", "port", "max-age") - - result = [] - for ns_header in ns_headers: - pairs = [] - version_set = False - - # XXX: The following does not strictly adhere to RFCs in that empty - # names and values are legal (the former will only appear once and will - # be overwritten if multiple occurrences are present). This is - # mostly to deal with backwards compatibility. - for ii, param in enumerate(ns_header.split(';')): - param = param.strip() - - key, sep, val = param.partition('=') - key = key.strip() - - if not key: - if ii == 0: - break - else: - continue - - # allow for a distinction between present and empty and missing - # altogether - val = val.strip() if sep else None - - if ii != 0: - lc = key.lower() - if lc in known_attrs: - key = lc - - if key == "version": - # This is an RFC 2109 cookie. - if val is not None: - val = _strip_quotes(val) - version_set = True - elif key == "expires": - # convert expires date to seconds since epoch - if val is not None: - val = http2time(_strip_quotes(val)) # None if invalid - pairs.append((key, val)) - - if pairs: - if not version_set: - pairs.append(("version", "0")) - result.append(pairs) - - return result - - -IPV4_RE = re.compile(r"\.\d+$") -def is_HDN(text): - """Return True if text is a host domain name.""" - # XXX - # This may well be wrong. Which RFC is HDN defined in, if any (for - # the purposes of RFC 2965)? - # For the current implementation, what about IPv6? Remember to look - # at other uses of IPV4_RE also, if change this. - if IPV4_RE.search(text): - return False - if text == "": - return False - if text[0] == "." or text[-1] == ".": - return False - return True - -def domain_match(A, B): - """Return True if domain A domain-matches domain B, according to RFC 2965. - - A and B may be host domain names or IP addresses. - - RFC 2965, section 1: - - Host names can be specified either as an IP address or a HDN string. - Sometimes we compare one host name with another. (Such comparisons SHALL - be case-insensitive.) Host A's name domain-matches host B's if - - * their host name strings string-compare equal; or - - * A is a HDN string and has the form NB, where N is a non-empty - name string, B has the form .B', and B' is a HDN string. (So, - x.y.com domain-matches .Y.com but not Y.com.) - - Note that domain-match is not a commutative operation: a.b.c.com - domain-matches .c.com, but not the reverse. - - """ - # Note that, if A or B are IP addresses, the only relevant part of the - # definition of the domain-match algorithm is the direct string-compare. - A = A.lower() - B = B.lower() - if A == B: - return True - if not is_HDN(A): - return False - i = A.rfind(B) - if i == -1 or i == 0: - # A does not have form NB, or N is the empty string - return False - if not B.startswith("."): - return False - if not is_HDN(B[1:]): - return False - return True - -def liberal_is_HDN(text): - """Return True if text is a sort-of-like a host domain name. - - For accepting/blocking domains. - - """ - if IPV4_RE.search(text): - return False - return True - -def user_domain_match(A, B): - """For blocking/accepting domains. - - A and B may be host domain names or IP addresses. - - """ - A = A.lower() - B = B.lower() - if not (liberal_is_HDN(A) and liberal_is_HDN(B)): - if A == B: - # equal IP addresses - return True - return False - initial_dot = B.startswith(".") - if initial_dot and A.endswith(B): - return True - if not initial_dot and A == B: - return True - return False - -cut_port_re = re.compile(r":\d+$") -def request_host(request): - """Return request-host, as defined by RFC 2965. - - Variation from RFC: returned value is lowercased, for convenient - comparison. - - """ - url = request.get_full_url() - host = urlparse.urlparse(url)[1] - if host == "": - host = request.get_header("Host", "") - - # remove port, if present - host = cut_port_re.sub("", host, 1) - return host.lower() - -def eff_request_host(request): - """Return a tuple (request-host, effective request-host name). - - As defined by RFC 2965, except both are lowercased. - - """ - erhn = req_host = request_host(request) - if req_host.find(".") == -1 and not IPV4_RE.search(req_host): - erhn = req_host + ".local" - return req_host, erhn - -def request_path(request): - """Path component of request-URI, as defined by RFC 2965.""" - url = request.get_full_url() - parts = urlparse.urlsplit(url) - path = escape_path(parts.path) - if not path.startswith("/"): - # fix bad RFC 2396 absoluteURI - path = "/" + path - return path - -def request_port(request): - host = request.get_host() - i = host.find(':') - if i >= 0: - port = host[i+1:] - try: - int(port) - except ValueError: - _debug("nonnumeric port: '%s'", port) - return None - else: - port = DEFAULT_HTTP_PORT - return port - -# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't -# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). -HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" -ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") -def uppercase_escaped_char(match): - return "%%%s" % match.group(1).upper() -def escape_path(path): - """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" - # There's no knowing what character encoding was used to create URLs - # containing %-escapes, but since we have to pick one to escape invalid - # path characters, we pick UTF-8, as recommended in the HTML 4.0 - # specification: - # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 - # And here, kind of: draft-fielding-uri-rfc2396bis-03 - # (And in draft IRI specification: draft-duerst-iri-05) - # (And here, for new URI schemes: RFC 2718) - if isinstance(path, unicode): - path = path.encode("utf-8") - path = urllib.quote(path, HTTP_PATH_SAFE) - path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) - return path - -def reach(h): - """Return reach of host h, as defined by RFC 2965, section 1. - - The reach R of a host name H is defined as follows: - - * If - - - H is the host domain name of a host; and, - - - H has the form A.B; and - - - A has no embedded (that is, interior) dots; and - - - B has at least one embedded dot, or B is the string "local". - then the reach of H is .B. - - * Otherwise, the reach of H is H. - - >>> reach("www.acme.com") - '.acme.com' - >>> reach("acme.com") - 'acme.com' - >>> reach("acme.local") - '.local' - - """ - i = h.find(".") - if i >= 0: - #a = h[:i] # this line is only here to show what a is - b = h[i+1:] - i = b.find(".") - if is_HDN(h) and (i >= 0 or b == "local"): - return "."+b - return h - -def is_third_party(request): - """ - - RFC 2965, section 3.3.6: - - An unverifiable transaction is to a third-party host if its request- - host U does not domain-match the reach R of the request-host O in the - origin transaction. - - """ - req_host = request_host(request) - if not domain_match(req_host, reach(request.get_origin_req_host())): - return True - else: - return False - - -class Cookie: - """HTTP Cookie. - - This class represents both Netscape and RFC 2965 cookies. - - This is deliberately a very simple class. It just holds attributes. It's - possible to construct Cookie instances that don't comply with the cookie - standards. CookieJar.make_cookies is the factory function for Cookie - objects -- it deals with cookie parsing, supplying defaults, and - normalising to the representation used in this class. CookiePolicy is - responsible for checking them to see whether they should be accepted from - and returned to the server. - - Note that the port may be present in the headers, but unspecified ("Port" - rather than"Port=80", for example); if this is the case, port is None. - - """ - - def __init__(self, version, name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest, - rfc2109=False, - ): - - if version is not None: version = int(version) - if expires is not None: expires = int(expires) - if port is None and port_specified is True: - raise ValueError("if port is None, port_specified must be false") - - self.version = version - self.name = name - self.value = value - self.port = port - self.port_specified = port_specified - # normalise case, as per RFC 2965 section 3.3.3 - self.domain = domain.lower() - self.domain_specified = domain_specified - # Sigh. We need to know whether the domain given in the - # cookie-attribute had an initial dot, in order to follow RFC 2965 - # (as clarified in draft errata). Needed for the returned $Domain - # value. - self.domain_initial_dot = domain_initial_dot - self.path = path - self.path_specified = path_specified - self.secure = secure - self.expires = expires - self.discard = discard - self.comment = comment - self.comment_url = comment_url - self.rfc2109 = rfc2109 - - self._rest = copy.copy(rest) - - def has_nonstandard_attr(self, name): - return name in self._rest - def get_nonstandard_attr(self, name, default=None): - return self._rest.get(name, default) - def set_nonstandard_attr(self, name, value): - self._rest[name] = value - - def is_expired(self, now=None): - if now is None: now = time.time() - if (self.expires is not None) and (self.expires <= now): - return True - return False - - def __str__(self): - if self.port is None: p = "" - else: p = ":"+self.port - limit = self.domain + p + self.path - if self.value is not None: - namevalue = "%s=%s" % (self.name, self.value) - else: - namevalue = self.name - return "" % (namevalue, limit) - - def __repr__(self): - args = [] - for name in ("version", "name", "value", - "port", "port_specified", - "domain", "domain_specified", "domain_initial_dot", - "path", "path_specified", - "secure", "expires", "discard", "comment", "comment_url", - ): - attr = getattr(self, name) - args.append("%s=%s" % (name, repr(attr))) - args.append("rest=%s" % repr(self._rest)) - args.append("rfc2109=%s" % repr(self.rfc2109)) - return "Cookie(%s)" % ", ".join(args) - - -class CookiePolicy: - """Defines which cookies get accepted from and returned to server. - - May also modify cookies, though this is probably a bad idea. - - The subclass DefaultCookiePolicy defines the standard rules for Netscape - and RFC 2965 cookies -- override that if you want a customised policy. - - """ - def set_ok(self, cookie, request): - """Return true if (and only if) cookie should be accepted from server. - - Currently, pre-expired cookies never get this far -- the CookieJar - class deletes such cookies itself. - - """ - raise NotImplementedError() - - def return_ok(self, cookie, request): - """Return true if (and only if) cookie should be returned to server.""" - raise NotImplementedError() - - def domain_return_ok(self, domain, request): - """Return false if cookies should not be returned, given cookie domain. - """ - return True - - def path_return_ok(self, path, request): - """Return false if cookies should not be returned, given cookie path. - """ - return True - - -class DefaultCookiePolicy(CookiePolicy): - """Implements the standard rules for accepting and returning cookies.""" - - DomainStrictNoDots = 1 - DomainStrictNonDomain = 2 - DomainRFC2965Match = 4 - - DomainLiberal = 0 - DomainStrict = DomainStrictNoDots|DomainStrictNonDomain - - def __init__(self, - blocked_domains=None, allowed_domains=None, - netscape=True, rfc2965=False, - rfc2109_as_netscape=None, - hide_cookie2=False, - strict_domain=False, - strict_rfc2965_unverifiable=True, - strict_ns_unverifiable=False, - strict_ns_domain=DomainLiberal, - strict_ns_set_initial_dollar=False, - strict_ns_set_path=False, - ): - """Constructor arguments should be passed as keyword arguments only.""" - self.netscape = netscape - self.rfc2965 = rfc2965 - self.rfc2109_as_netscape = rfc2109_as_netscape - self.hide_cookie2 = hide_cookie2 - self.strict_domain = strict_domain - self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable - self.strict_ns_unverifiable = strict_ns_unverifiable - self.strict_ns_domain = strict_ns_domain - self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar - self.strict_ns_set_path = strict_ns_set_path - - if blocked_domains is not None: - self._blocked_domains = tuple(blocked_domains) - else: - self._blocked_domains = () - - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def blocked_domains(self): - """Return the sequence of blocked domains (as a tuple).""" - return self._blocked_domains - def set_blocked_domains(self, blocked_domains): - """Set the sequence of blocked domains.""" - self._blocked_domains = tuple(blocked_domains) - - def is_blocked(self, domain): - for blocked_domain in self._blocked_domains: - if user_domain_match(domain, blocked_domain): - return True - return False - - def allowed_domains(self): - """Return None, or the sequence of allowed domains (as a tuple).""" - return self._allowed_domains - def set_allowed_domains(self, allowed_domains): - """Set the sequence of allowed domains, or None.""" - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def is_not_allowed(self, domain): - if self._allowed_domains is None: - return False - for allowed_domain in self._allowed_domains: - if user_domain_match(domain, allowed_domain): - return False - return True - - def set_ok(self, cookie, request): - """ - If you override .set_ok(), be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to accept). - - """ - _debug(" - checking cookie %s=%s", cookie.name, cookie.value) - - assert cookie.name is not None - - for n in "version", "verifiability", "name", "path", "domain", "port": - fn_name = "set_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - - return True - - def set_ok_version(self, cookie, request): - if cookie.version is None: - # Version is always set to 0 by parse_ns_headers if it's a Netscape - # cookie, so this must be an invalid RFC 2965 cookie. - _debug(" Set-Cookie2 without version attribute (%s=%s)", - cookie.name, cookie.value) - return False - if cookie.version > 0 and not self.rfc2965: - _debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - _debug(" Netscape cookies are switched off") - return False - return True - - def set_ok_verifiability(self, cookie, request): - if request.is_unverifiable() and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - _debug(" third-party RFC 2965 cookie during " - "unverifiable transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - _debug(" third-party Netscape cookie during " - "unverifiable transaction") - return False - return True - - def set_ok_name(self, cookie, request): - # Try and stop servers setting V0 cookies designed to hack other - # servers that know both V0 and V1 protocols. - if (cookie.version == 0 and self.strict_ns_set_initial_dollar and - cookie.name.startswith("$")): - _debug(" illegal name (starts with '$'): '%s'", cookie.name) - return False - return True - - def set_ok_path(self, cookie, request): - if cookie.path_specified: - req_path = request_path(request) - if ((cookie.version > 0 or - (cookie.version == 0 and self.strict_ns_set_path)) and - not req_path.startswith(cookie.path)): - _debug(" path attribute %s is not a prefix of request " - "path %s", cookie.path, req_path) - return False - return True - - def set_ok_domain(self, cookie, request): - if self.is_blocked(cookie.domain): - _debug(" domain %s is in user block-list", cookie.domain) - return False - if self.is_not_allowed(cookie.domain): - _debug(" domain %s is not in user allow-list", cookie.domain) - return False - if cookie.domain_specified: - req_host, erhn = eff_request_host(request) - domain = cookie.domain - if self.strict_domain and (domain.count(".") >= 2): - # XXX This should probably be compared with the Konqueror - # (kcookiejar.cpp) and Mozilla implementations, but it's a - # losing battle. - i = domain.rfind(".") - j = domain.rfind(".", 0, i) - if j == 0: # domain like .foo.bar - tld = domain[i+1:] - sld = domain[j+1:i] - if sld.lower() in ("co", "ac", "com", "edu", "org", "net", - "gov", "mil", "int", "aero", "biz", "cat", "coop", - "info", "jobs", "mobi", "museum", "name", "pro", - "travel", "eu") and len(tld) == 2: - # domain like .co.uk - _debug(" country-code second level domain %s", domain) - return False - if domain.startswith("."): - undotted_domain = domain[1:] - else: - undotted_domain = domain - embedded_dots = (undotted_domain.find(".") >= 0) - if not embedded_dots and domain != ".local": - _debug(" non-local domain %s contains no embedded dot", - domain) - return False - if cookie.version == 0: - if (not erhn.endswith(domain) and - (not erhn.startswith(".") and - not ("."+erhn).endswith(domain))): - _debug(" effective request-host %s (even with added " - "initial dot) does not end with %s", - erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainRFC2965Match)): - if not domain_match(erhn, domain): - _debug(" effective request-host %s does not domain-match " - "%s", erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainStrictNoDots)): - host_prefix = req_host[:-len(domain)] - if (host_prefix.find(".") >= 0 and - not IPV4_RE.search(req_host)): - _debug(" host prefix %s for domain %s contains a dot", - host_prefix, domain) - return False - return True - - def set_ok_port(self, cookie, request): - if cookie.port_specified: - req_port = request_port(request) - if req_port is None: - req_port = "80" - else: - req_port = str(req_port) - for p in cookie.port.split(","): - try: - int(p) - except ValueError: - _debug(" bad port %s (not numeric)", p) - return False - if p == req_port: - break - else: - _debug(" request port (%s) not found in %s", - req_port, cookie.port) - return False - return True - - def return_ok(self, cookie, request): - """ - If you override .return_ok(), be sure to call this method. If it - returns false, so should your subclass (assuming your subclass wants to - be more strict about which cookies to return). - - """ - # Path has already been checked by .path_return_ok(), and domain - # blocking done by .domain_return_ok(). - _debug(" - checking cookie %s=%s", cookie.name, cookie.value) - - for n in "version", "verifiability", "secure", "expires", "port", "domain": - fn_name = "return_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - return True - - def return_ok_version(self, cookie, request): - if cookie.version > 0 and not self.rfc2965: - _debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - _debug(" Netscape cookies are switched off") - return False - return True - - def return_ok_verifiability(self, cookie, request): - if request.is_unverifiable() and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - _debug(" third-party RFC 2965 cookie during unverifiable " - "transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - _debug(" third-party Netscape cookie during unverifiable " - "transaction") - return False - return True - - def return_ok_secure(self, cookie, request): - if cookie.secure and request.get_type() != "https": - _debug(" secure cookie with non-secure request") - return False - return True - - def return_ok_expires(self, cookie, request): - if cookie.is_expired(self._now): - _debug(" cookie expired") - return False - return True - - def return_ok_port(self, cookie, request): - if cookie.port: - req_port = request_port(request) - if req_port is None: - req_port = "80" - for p in cookie.port.split(","): - if p == req_port: - break - else: - _debug(" request port %s does not match cookie port %s", - req_port, cookie.port) - return False - return True - - def return_ok_domain(self, cookie, request): - req_host, erhn = eff_request_host(request) - domain = cookie.domain - - # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't - if (cookie.version == 0 and - (self.strict_ns_domain & self.DomainStrictNonDomain) and - not cookie.domain_specified and domain != erhn): - _debug(" cookie with unspecified domain does not string-compare " - "equal to request domain") - return False - - if cookie.version > 0 and not domain_match(erhn, domain): - _debug(" effective request-host name %s does not domain-match " - "RFC 2965 cookie domain %s", erhn, domain) - return False - if cookie.version == 0 and not ("."+erhn).endswith(domain): - _debug(" request-host %s does not match Netscape cookie domain " - "%s", req_host, domain) - return False - return True - - def domain_return_ok(self, domain, request): - # Liberal check of. This is here as an optimization to avoid - # having to load lots of MSIE cookie files unless necessary. - req_host, erhn = eff_request_host(request) - if not req_host.startswith("."): - req_host = "."+req_host - if not erhn.startswith("."): - erhn = "."+erhn - if not (req_host.endswith(domain) or erhn.endswith(domain)): - #_debug(" request domain %s does not match cookie domain %s", - # req_host, domain) - return False - - if self.is_blocked(domain): - _debug(" domain %s is in user block-list", domain) - return False - if self.is_not_allowed(domain): - _debug(" domain %s is not in user allow-list", domain) - return False - - return True - - def path_return_ok(self, path, request): - _debug("- checking cookie path=%s", path) - req_path = request_path(request) - if not req_path.startswith(path): - _debug(" %s does not path-match %s", req_path, path) - return False - return True - - -def vals_sorted_by_key(adict): - keys = adict.keys() - keys.sort() - return map(adict.get, keys) - -def deepvalues(mapping): - """Iterates over nested mapping, depth-first, in sorted order by key.""" - values = vals_sorted_by_key(mapping) - for obj in values: - mapping = False - try: - obj.items - except AttributeError: - pass - else: - mapping = True - for subobj in deepvalues(obj): - yield subobj - if not mapping: - yield obj - - -# Used as second parameter to dict.get() method, to distinguish absent -# dict key from one with a None value. -class Absent: pass - -class CookieJar: - """Collection of HTTP cookies. - - You may not need to know about this class: try - urllib2.build_opener(HTTPCookieProcessor).open(url). - - """ - - non_word_re = re.compile(r"\W") - quote_re = re.compile(r"([\"\\])") - strict_domain_re = re.compile(r"\.?[^.]*") - domain_re = re.compile(r"[^.]*") - dots_re = re.compile(r"^\.+") - - magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" - - def __init__(self, policy=None): - if policy is None: - policy = DefaultCookiePolicy() - self._policy = policy - - self._cookies_lock = _threading.RLock() - self._cookies = {} - - def set_policy(self, policy): - self._policy = policy - - def _cookies_for_domain(self, domain, request): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - _debug("Checking %s for cookies to return", domain) - cookies_by_path = self._cookies[domain] - for path in cookies_by_path.keys(): - if not self._policy.path_return_ok(path, request): - continue - cookies_by_name = cookies_by_path[path] - for cookie in cookies_by_name.values(): - if not self._policy.return_ok(cookie, request): - _debug(" not returning cookie") - continue - _debug(" it's a match") - cookies.append(cookie) - return cookies - - def _cookies_for_request(self, request): - """Return a list of cookies to be returned to server.""" - cookies = [] - for domain in self._cookies.keys(): - cookies.extend(self._cookies_for_domain(domain, request)) - return cookies - - def _cookie_attrs(self, cookies): - """Return a list of cookie-attributes to be returned to server. - - like ['foo="bar"; $Path="/"', ...] - - The $Version attribute is also added when appropriate (currently only - once per request). - - """ - # add cookies in order of most specific (ie. longest) path first - cookies.sort(key=lambda arg: len(arg.path), reverse=True) - - version_set = False - - attrs = [] - for cookie in cookies: - # set version of Cookie header - # XXX - # What should it be if multiple matching Set-Cookie headers have - # different versions themselves? - # Answer: there is no answer; was supposed to be settled by - # RFC 2965 errata, but that may never appear... - version = cookie.version - if not version_set: - version_set = True - if version > 0: - attrs.append("$Version=%s" % version) - - # quote cookie value if necessary - # (not for Netscape protocol, which already has any quotes - # intact, due to the poorly-specified Netscape Cookie: syntax) - if ((cookie.value is not None) and - self.non_word_re.search(cookie.value) and version > 0): - value = self.quote_re.sub(r"\\\1", cookie.value) - else: - value = cookie.value - - # add cookie-attributes to be returned in Cookie header - if cookie.value is None: - attrs.append(cookie.name) - else: - attrs.append("%s=%s" % (cookie.name, value)) - if version > 0: - if cookie.path_specified: - attrs.append('$Path="%s"' % cookie.path) - if cookie.domain.startswith("."): - domain = cookie.domain - if (not cookie.domain_initial_dot and - domain.startswith(".")): - domain = domain[1:] - attrs.append('$Domain="%s"' % domain) - if cookie.port is not None: - p = "$Port" - if cookie.port_specified: - p = p + ('="%s"' % cookie.port) - attrs.append(p) - - return attrs - - def add_cookie_header(self, request): - """Add correct Cookie: header to request (urllib2.Request object). - - The Cookie2 header is also added unless policy.hide_cookie2 is true. - - """ - _debug("add_cookie_header") - self._cookies_lock.acquire() - try: - - self._policy._now = self._now = int(time.time()) - - cookies = self._cookies_for_request(request) - - attrs = self._cookie_attrs(cookies) - if attrs: - if not request.has_header("Cookie"): - request.add_unredirected_header( - "Cookie", "; ".join(attrs)) - - # if necessary, advertise that we know RFC 2965 - if (self._policy.rfc2965 and not self._policy.hide_cookie2 and - not request.has_header("Cookie2")): - for cookie in cookies: - if cookie.version != 1: - request.add_unredirected_header("Cookie2", '$Version="1"') - break - - finally: - self._cookies_lock.release() - - self.clear_expired_cookies() - - def _normalized_cookie_tuples(self, attrs_set): - """Return list of tuples containing normalised cookie information. - - attrs_set is the list of lists of key,value pairs extracted from - the Set-Cookie or Set-Cookie2 headers. - - Tuples are name, value, standard, rest, where name and value are the - cookie name and value, standard is a dictionary containing the standard - cookie-attributes (discard, secure, version, expires or max-age, - domain, path and port) and rest is a dictionary containing the rest of - the cookie-attributes. - - """ - cookie_tuples = [] - - boolean_attrs = "discard", "secure" - value_attrs = ("version", - "expires", "max-age", - "domain", "path", "port", - "comment", "commenturl") - - for cookie_attrs in attrs_set: - name, value = cookie_attrs[0] - - # Build dictionary of standard cookie-attributes (standard) and - # dictionary of other cookie-attributes (rest). - - # Note: expiry time is normalised to seconds since epoch. V0 - # cookies should have the Expires cookie-attribute, and V1 cookies - # should have Max-Age, but since V1 includes RFC 2109 cookies (and - # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we - # accept either (but prefer Max-Age). - max_age_set = False - - bad_cookie = False - - standard = {} - rest = {} - for k, v in cookie_attrs[1:]: - lc = k.lower() - # don't lose case distinction for unknown fields - if lc in value_attrs or lc in boolean_attrs: - k = lc - if k in boolean_attrs and v is None: - # boolean cookie-attribute is present, but has no value - # (like "discard", rather than "port=80") - v = True - if k in standard: - # only first value is significant - continue - if k == "domain": - if v is None: - _debug(" missing value for domain attribute") - bad_cookie = True - break - # RFC 2965 section 3.3.3 - v = v.lower() - if k == "expires": - if max_age_set: - # Prefer max-age to expires (like Mozilla) - continue - if v is None: - _debug(" missing or invalid value for expires " - "attribute: treating as session cookie") - continue - if k == "max-age": - max_age_set = True - try: - v = int(v) - except ValueError: - _debug(" missing or invalid (non-numeric) value for " - "max-age attribute") - bad_cookie = True - break - # convert RFC 2965 Max-Age to seconds since epoch - # XXX Strictly you're supposed to follow RFC 2616 - # age-calculation rules. Remember that zero Max-Age - # is a request to discard (old and new) cookie, though. - k = "expires" - v = self._now + v - if (k in value_attrs) or (k in boolean_attrs): - if (v is None and - k not in ("port", "comment", "commenturl")): - _debug(" missing value for %s attribute" % k) - bad_cookie = True - break - standard[k] = v - else: - rest[k] = v - - if bad_cookie: - continue - - cookie_tuples.append((name, value, standard, rest)) - - return cookie_tuples - - def _cookie_from_cookie_tuple(self, tup, request): - # standard is dict of standard cookie-attributes, rest is dict of the - # rest of them - name, value, standard, rest = tup - - domain = standard.get("domain", Absent) - path = standard.get("path", Absent) - port = standard.get("port", Absent) - expires = standard.get("expires", Absent) - - # set the easy defaults - version = standard.get("version", None) - if version is not None: - try: - version = int(version) - except ValueError: - return None # invalid version, ignore cookie - secure = standard.get("secure", False) - # (discard is also set if expires is Absent) - discard = standard.get("discard", False) - comment = standard.get("comment", None) - comment_url = standard.get("commenturl", None) - - # set default path - if path is not Absent and path != "": - path_specified = True - path = escape_path(path) - else: - path_specified = False - path = request_path(request) - i = path.rfind("/") - if i != -1: - if version == 0: - # Netscape spec parts company from reality here - path = path[:i] - else: - path = path[:i+1] - if len(path) == 0: path = "/" - - # set default domain - domain_specified = domain is not Absent - # but first we have to remember whether it starts with a dot - domain_initial_dot = False - if domain_specified: - domain_initial_dot = bool(domain.startswith(".")) - if domain is Absent: - req_host, erhn = eff_request_host(request) - domain = erhn - elif not domain.startswith("."): - domain = "."+domain - - # set default port - port_specified = False - if port is not Absent: - if port is None: - # Port attr present, but has no value: default to request port. - # Cookie should then only be sent back on that port. - port = request_port(request) - else: - port_specified = True - port = re.sub(r"\s+", "", port) - else: - # No port attr present. Cookie can be sent back on any port. - port = None - - # set default expires and discard - if expires is Absent: - expires = None - discard = True - elif expires <= self._now: - # Expiry date in past is request to delete cookie. This can't be - # in DefaultCookiePolicy, because can't delete cookies there. - try: - self.clear(domain, path, name) - except KeyError: - pass - _debug("Expiring cookie, domain='%s', path='%s', name='%s'", - domain, path, name) - return None - - return Cookie(version, - name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest) - - def _cookies_from_attrs_set(self, attrs_set, request): - cookie_tuples = self._normalized_cookie_tuples(attrs_set) - - cookies = [] - for tup in cookie_tuples: - cookie = self._cookie_from_cookie_tuple(tup, request) - if cookie: cookies.append(cookie) - return cookies - - def _process_rfc2109_cookies(self, cookies): - rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) - if rfc2109_as_ns is None: - rfc2109_as_ns = not self._policy.rfc2965 - for cookie in cookies: - if cookie.version == 1: - cookie.rfc2109 = True - if rfc2109_as_ns: - # treat 2109 cookies as Netscape cookies rather than - # as RFC2965 cookies - cookie.version = 0 - - def make_cookies(self, response, request): - """Return sequence of Cookie objects extracted from response object.""" - # get cookie-attributes for RFC 2965 and Netscape protocols - headers = response.info() - rfc2965_hdrs = headers.getheaders("Set-Cookie2") - ns_hdrs = headers.getheaders("Set-Cookie") - - rfc2965 = self._policy.rfc2965 - netscape = self._policy.netscape - - if ((not rfc2965_hdrs and not ns_hdrs) or - (not ns_hdrs and not rfc2965) or - (not rfc2965_hdrs and not netscape) or - (not netscape and not rfc2965)): - return [] # no relevant cookie headers: quick exit - - try: - cookies = self._cookies_from_attrs_set( - split_header_words(rfc2965_hdrs), request) - except Exception: - _warn_unhandled_exception() - cookies = [] - - if ns_hdrs and netscape: - try: - # RFC 2109 and Netscape cookies - ns_cookies = self._cookies_from_attrs_set( - parse_ns_headers(ns_hdrs), request) - except Exception: - _warn_unhandled_exception() - ns_cookies = [] - self._process_rfc2109_cookies(ns_cookies) - - # Look for Netscape cookies (from Set-Cookie headers) that match - # corresponding RFC 2965 cookies (from Set-Cookie2 headers). - # For each match, keep the RFC 2965 cookie and ignore the Netscape - # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are - # bundled in with the Netscape cookies for this purpose, which is - # reasonable behaviour. - if rfc2965: - lookup = {} - for cookie in cookies: - lookup[(cookie.domain, cookie.path, cookie.name)] = None - - def no_matching_rfc2965(ns_cookie, lookup=lookup): - key = ns_cookie.domain, ns_cookie.path, ns_cookie.name - return key not in lookup - ns_cookies = filter(no_matching_rfc2965, ns_cookies) - - if ns_cookies: - cookies.extend(ns_cookies) - - return cookies - - def set_cookie_if_ok(self, cookie, request): - """Set a cookie if policy says it's OK to do so.""" - self._cookies_lock.acquire() - try: - self._policy._now = self._now = int(time.time()) - - if self._policy.set_ok(cookie, request): - self.set_cookie(cookie) - - - finally: - self._cookies_lock.release() - - def set_cookie(self, cookie): - """Set a cookie, without checking whether or not it should be set.""" - c = self._cookies - self._cookies_lock.acquire() - try: - if cookie.domain not in c: c[cookie.domain] = {} - c2 = c[cookie.domain] - if cookie.path not in c2: c2[cookie.path] = {} - c3 = c2[cookie.path] - c3[cookie.name] = cookie - finally: - self._cookies_lock.release() - - def extract_cookies(self, response, request): - """Extract cookies from response, where allowable given the request.""" - _debug("extract_cookies: %s", response.info()) - self._cookies_lock.acquire() - try: - self._policy._now = self._now = int(time.time()) - - for cookie in self.make_cookies(response, request): - if self._policy.set_ok(cookie, request): - _debug(" setting cookie: %s", cookie) - self.set_cookie(cookie) - finally: - self._cookies_lock.release() - - def clear(self, domain=None, path=None, name=None): - """Clear some cookies. - - Invoking this method without arguments will clear all cookies. If - given a single argument, only cookies belonging to that domain will be - removed. If given two arguments, cookies belonging to the specified - path within that domain are removed. If given three arguments, then - the cookie with the specified name, path and domain is removed. - - Raises KeyError if no matching cookie exists. - - """ - if name is not None: - if (domain is None) or (path is None): - raise ValueError( - "domain and path must be given to remove a cookie by name") - del self._cookies[domain][path][name] - elif path is not None: - if domain is None: - raise ValueError( - "domain must be given to remove cookies by path") - del self._cookies[domain][path] - elif domain is not None: - del self._cookies[domain] - else: - self._cookies = {} - - def clear_session_cookies(self): - """Discard all session cookies. - - Note that the .save() method won't save session cookies anyway, unless - you ask otherwise by passing a true ignore_discard argument. - - """ - self._cookies_lock.acquire() - try: - for cookie in self: - if cookie.discard: - self.clear(cookie.domain, cookie.path, cookie.name) - finally: - self._cookies_lock.release() - - def clear_expired_cookies(self): - """Discard all expired cookies. - - You probably don't need to call this method: expired cookies are never - sent back to the server (provided you're using DefaultCookiePolicy), - this method is called by CookieJar itself every so often, and the - .save() method won't save expired cookies anyway (unless you ask - otherwise by passing a true ignore_expires argument). - - """ - self._cookies_lock.acquire() - try: - now = time.time() - for cookie in self: - if cookie.is_expired(now): - self.clear(cookie.domain, cookie.path, cookie.name) - finally: - self._cookies_lock.release() - - def __iter__(self): - return deepvalues(self._cookies) - - def __len__(self): - """Return number of contained cookies.""" - i = 0 - for cookie in self: i = i + 1 - return i - - def __repr__(self): - r = [] - for cookie in self: r.append(repr(cookie)) - return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) - - def __str__(self): - r = [] - for cookie in self: r.append(str(cookie)) - return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) - - -# derives from IOError for backwards-compatibility with Python 2.4.0 -class LoadError(IOError): pass - -class FileCookieJar(CookieJar): - """CookieJar that can be loaded from and saved to a file.""" - - def __init__(self, filename=None, delayload=False, policy=None): - """ - Cookies are NOT loaded from the named file until either the .load() or - .revert() method is called. - - """ - CookieJar.__init__(self, policy) - if filename is not None: - try: - filename+"" - except: - raise ValueError("filename must be string-like") - self.filename = filename - self.delayload = bool(delayload) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - """Save cookies to a file.""" - raise NotImplementedError() - - def load(self, filename=None, ignore_discard=False, ignore_expires=False): - """Load cookies from a file.""" - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename) - try: - self._really_load(f, filename, ignore_discard, ignore_expires) - finally: - f.close() - - def revert(self, filename=None, - ignore_discard=False, ignore_expires=False): - """Clear all cookies and reload cookies from a saved file. - - Raises LoadError (or IOError) if reversion is not successful; the - object's state will not be altered if this happens. - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - self._cookies_lock.acquire() - try: - - old_state = copy.deepcopy(self._cookies) - self._cookies = {} - try: - self.load(filename, ignore_discard, ignore_expires) - except (LoadError, IOError): - self._cookies = old_state - raise - - finally: - self._cookies_lock.release() - -from _LWPCookieJar import LWPCookieJar, lwp_cookie_str -from _MozillaCookieJar import MozillaCookieJar diff --git a/python/Lib/copy.py b/python/Lib/copy.py deleted file mode 100755 index daf81a3ff8..0000000000 --- a/python/Lib/copy.py +++ /dev/null @@ -1,433 +0,0 @@ -"""Generic (shallow and deep) copying operations. - -Interface summary: - - import copy - - x = copy.copy(y) # make a shallow copy of y - x = copy.deepcopy(y) # make a deep copy of y - -For module specific errors, copy.Error is raised. - -The difference between shallow and deep copying is only relevant for -compound objects (objects that contain other objects, like lists or -class instances). - -- A shallow copy constructs a new compound object and then (to the - extent possible) inserts *the same objects* into it that the - original contains. - -- A deep copy constructs a new compound object and then, recursively, - inserts *copies* into it of the objects found in the original. - -Two problems often exist with deep copy operations that don't exist -with shallow copy operations: - - a) recursive objects (compound objects that, directly or indirectly, - contain a reference to themselves) may cause a recursive loop - - b) because deep copy copies *everything* it may copy too much, e.g. - administrative data structures that should be shared even between - copies - -Python's deep copy operation avoids these problems by: - - a) keeping a table of objects already copied during the current - copying pass - - b) letting user-defined classes override the copying operation or the - set of components copied - -This version does not copy types like module, class, function, method, -nor stack trace, stack frame, nor file, socket, window, nor array, nor -any similar types. - -Classes can use the same interfaces to control copying that they use -to control pickling: they can define methods called __getinitargs__(), -__getstate__() and __setstate__(). See the documentation for module -"pickle" for information on these methods. -""" - -import types -import weakref -from copy_reg import dispatch_table - -class Error(Exception): - pass -error = Error # backward compatibility - -try: - from org.python.core import PyStringMap -except ImportError: - PyStringMap = None - -__all__ = ["Error", "copy", "deepcopy"] - -def copy(x): - """Shallow copy operation on arbitrary Python objects. - - See the module's __doc__ string for more info. - """ - - cls = type(x) - - copier = _copy_dispatch.get(cls) - if copier: - return copier(x) - - copier = getattr(cls, "__copy__", None) - if copier: - return copier(x) - - reductor = dispatch_table.get(cls) - if reductor: - rv = reductor(x) - else: - reductor = getattr(x, "__reduce_ex__", None) - if reductor: - rv = reductor(2) - else: - reductor = getattr(x, "__reduce__", None) - if reductor: - rv = reductor() - else: - raise Error("un(shallow)copyable object of type %s" % cls) - - return _reconstruct(x, rv, 0) - - -_copy_dispatch = d = {} - -def _copy_immutable(x): - return x -for t in (type(None), int, long, float, bool, str, tuple, - frozenset, type, xrange, types.ClassType, - types.BuiltinFunctionType, type(Ellipsis), - types.FunctionType, weakref.ref): - d[t] = _copy_immutable -for name in ("ComplexType", "UnicodeType", "CodeType"): - t = getattr(types, name, None) - if t is not None: - d[t] = _copy_immutable - -def _copy_with_constructor(x): - return type(x)(x) -for t in (list, dict, set): - d[t] = _copy_with_constructor - -def _copy_with_copy_method(x): - return x.copy() -if PyStringMap is not None: - d[PyStringMap] = _copy_with_copy_method - -def _copy_inst(x): - if hasattr(x, '__copy__'): - return x.__copy__() - if hasattr(x, '__getinitargs__'): - args = x.__getinitargs__() - y = x.__class__(*args) - else: - y = _EmptyClass() - y.__class__ = x.__class__ - if hasattr(x, '__getstate__'): - state = x.__getstate__() - else: - state = x.__dict__ - if hasattr(y, '__setstate__'): - y.__setstate__(state) - else: - y.__dict__.update(state) - return y -d[types.InstanceType] = _copy_inst - -del d - -def deepcopy(x, memo=None, _nil=[]): - """Deep copy operation on arbitrary Python objects. - - See the module's __doc__ string for more info. - """ - - if memo is None: - memo = {} - - d = id(x) - y = memo.get(d, _nil) - if y is not _nil: - return y - - cls = type(x) - - copier = _deepcopy_dispatch.get(cls) - if copier: - y = copier(x, memo) - else: - try: - issc = issubclass(cls, type) - except TypeError: # cls is not a class (old Boost; see SF #502085) - issc = 0 - if issc: - y = _deepcopy_atomic(x, memo) - else: - copier = getattr(x, "__deepcopy__", None) - if copier: - y = copier(memo) - else: - reductor = dispatch_table.get(cls) - if reductor: - rv = reductor(x) - else: - reductor = getattr(x, "__reduce_ex__", None) - if reductor: - rv = reductor(2) - else: - reductor = getattr(x, "__reduce__", None) - if reductor: - rv = reductor() - else: - raise Error( - "un(deep)copyable object of type %s" % cls) - y = _reconstruct(x, rv, 1, memo) - - memo[d] = y - _keep_alive(x, memo) # Make sure x lives at least as long as d - return y - -_deepcopy_dispatch = d = {} - -def _deepcopy_atomic(x, memo): - return x -d[type(None)] = _deepcopy_atomic -d[type(Ellipsis)] = _deepcopy_atomic -d[int] = _deepcopy_atomic -d[long] = _deepcopy_atomic -d[float] = _deepcopy_atomic -d[bool] = _deepcopy_atomic -try: - d[complex] = _deepcopy_atomic -except NameError: - pass -d[str] = _deepcopy_atomic -try: - d[unicode] = _deepcopy_atomic -except NameError: - pass -try: - d[types.CodeType] = _deepcopy_atomic -except AttributeError: - pass -d[type] = _deepcopy_atomic -d[xrange] = _deepcopy_atomic -d[types.ClassType] = _deepcopy_atomic -d[types.BuiltinFunctionType] = _deepcopy_atomic -d[types.FunctionType] = _deepcopy_atomic -d[weakref.ref] = _deepcopy_atomic - -def _deepcopy_list(x, memo): - y = [] - memo[id(x)] = y - for a in x: - y.append(deepcopy(a, memo)) - return y -d[list] = _deepcopy_list - -def _deepcopy_tuple(x, memo): - y = [] - for a in x: - y.append(deepcopy(a, memo)) - d = id(x) - try: - return memo[d] - except KeyError: - pass - for i in range(len(x)): - if x[i] is not y[i]: - y = tuple(y) - break - else: - y = x - memo[d] = y - return y -d[tuple] = _deepcopy_tuple - -def _deepcopy_dict(x, memo): - y = {} - memo[id(x)] = y - for key, value in x.iteritems(): - y[deepcopy(key, memo)] = deepcopy(value, memo) - return y -d[dict] = _deepcopy_dict -if PyStringMap is not None: - d[PyStringMap] = _deepcopy_dict - -def _deepcopy_method(x, memo): # Copy instance methods - return type(x)(x.im_func, deepcopy(x.im_self, memo), x.im_class) -_deepcopy_dispatch[types.MethodType] = _deepcopy_method - -def _keep_alive(x, memo): - """Keeps a reference to the object x in the memo. - - Because we remember objects by their id, we have - to assure that possibly temporary objects are kept - alive by referencing them. - We store a reference at the id of the memo, which should - normally not be used unless someone tries to deepcopy - the memo itself... - """ - try: - memo[id(memo)].append(x) - except KeyError: - # aha, this is the first one :-) - memo[id(memo)]=[x] - -def _deepcopy_inst(x, memo): - if hasattr(x, '__deepcopy__'): - return x.__deepcopy__(memo) - if hasattr(x, '__getinitargs__'): - args = x.__getinitargs__() - args = deepcopy(args, memo) - y = x.__class__(*args) - else: - y = _EmptyClass() - y.__class__ = x.__class__ - memo[id(x)] = y - if hasattr(x, '__getstate__'): - state = x.__getstate__() - else: - state = x.__dict__ - state = deepcopy(state, memo) - if hasattr(y, '__setstate__'): - y.__setstate__(state) - else: - y.__dict__.update(state) - return y -d[types.InstanceType] = _deepcopy_inst - -def _reconstruct(x, info, deep, memo=None): - if isinstance(info, str): - return x - assert isinstance(info, tuple) - if memo is None: - memo = {} - n = len(info) - assert n in (2, 3, 4, 5) - callable, args = info[:2] - if n > 2: - state = info[2] - else: - state = None - if n > 3: - listiter = info[3] - else: - listiter = None - if n > 4: - dictiter = info[4] - else: - dictiter = None - if deep: - args = deepcopy(args, memo) - y = callable(*args) - memo[id(x)] = y - - if state is not None: - if deep: - state = deepcopy(state, memo) - if hasattr(y, '__setstate__'): - y.__setstate__(state) - else: - if isinstance(state, tuple) and len(state) == 2: - state, slotstate = state - else: - slotstate = None - if state is not None: - y.__dict__.update(state) - if slotstate is not None: - for key, value in slotstate.iteritems(): - setattr(y, key, value) - - if listiter is not None: - for item in listiter: - if deep: - item = deepcopy(item, memo) - y.append(item) - if dictiter is not None: - for key, value in dictiter: - if deep: - key = deepcopy(key, memo) - value = deepcopy(value, memo) - y[key] = value - return y - -del d - -del types - -# Helper for instance creation without calling __init__ -class _EmptyClass: - pass - -def _test(): - l = [None, 1, 2L, 3.14, 'xyzzy', (1, 2L), [3.14, 'abc'], - {'abc': 'ABC'}, (), [], {}] - l1 = copy(l) - print l1==l - l1 = map(copy, l) - print l1==l - l1 = deepcopy(l) - print l1==l - class C: - def __init__(self, arg=None): - self.a = 1 - self.arg = arg - if __name__ == '__main__': - import sys - file = sys.argv[0] - else: - file = __file__ - self.fp = open(file) - self.fp.close() - def __getstate__(self): - return {'a': self.a, 'arg': self.arg} - def __setstate__(self, state): - for key, value in state.iteritems(): - setattr(self, key, value) - def __deepcopy__(self, memo=None): - new = self.__class__(deepcopy(self.arg, memo)) - new.a = self.a - return new - c = C('argument sketch') - l.append(c) - l2 = copy(l) - print l == l2 - print l - print l2 - l2 = deepcopy(l) - print l == l2 - print l - print l2 - l.append({l[1]: l, 'xyz': l[2]}) - l3 = copy(l) - import repr - print map(repr.repr, l) - print map(repr.repr, l1) - print map(repr.repr, l2) - print map(repr.repr, l3) - l3 = deepcopy(l) - import repr - print map(repr.repr, l) - print map(repr.repr, l1) - print map(repr.repr, l2) - print map(repr.repr, l3) - class odict(dict): - def __init__(self, d = {}): - self.a = 99 - dict.__init__(self, d) - def __setitem__(self, k, i): - dict.__setitem__(self, k, i) - self.a - o = odict({"A" : "B"}) - x = deepcopy(o) - print(o, x) - -if __name__ == '__main__': - _test() diff --git a/python/Lib/copy_reg.py b/python/Lib/copy_reg.py deleted file mode 100755 index db1715092c..0000000000 --- a/python/Lib/copy_reg.py +++ /dev/null @@ -1,201 +0,0 @@ -"""Helper to provide extensibility for pickle/cPickle. - -This is only useful to add pickle support for extension types defined in -C, not for instances of user-defined classes. -""" - -from types import ClassType as _ClassType - -__all__ = ["pickle", "constructor", - "add_extension", "remove_extension", "clear_extension_cache"] - -dispatch_table = {} - -def pickle(ob_type, pickle_function, constructor_ob=None): - if type(ob_type) is _ClassType: - raise TypeError("copy_reg is not intended for use with classes") - - if not hasattr(pickle_function, '__call__'): - raise TypeError("reduction functions must be callable") - dispatch_table[ob_type] = pickle_function - - # The constructor_ob function is a vestige of safe for unpickling. - # There is no reason for the caller to pass it anymore. - if constructor_ob is not None: - constructor(constructor_ob) - -def constructor(object): - if not hasattr(object, '__call__'): - raise TypeError("constructors must be callable") - -# Example: provide pickling support for complex numbers. - -try: - complex -except NameError: - pass -else: - - def pickle_complex(c): - return complex, (c.real, c.imag) - - pickle(complex, pickle_complex, complex) - -# Support for pickling new-style objects - -def _reconstructor(cls, base, state): - if base is object: - obj = object.__new__(cls) - else: - obj = base.__new__(cls, state) - if base.__init__ != object.__init__: - base.__init__(obj, state) - return obj - -_HEAPTYPE = 1<<9 - -# Python code for object.__reduce_ex__ for protocols 0 and 1 - -def _reduce_ex(self, proto): - assert proto < 2 - for base in self.__class__.__mro__: - if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE: - break - else: - base = object # not really reachable - if base is object: - state = None - else: - if base is self.__class__: - raise TypeError, "can't pickle %s objects" % base.__name__ - state = base(self) - args = (self.__class__, base, state) - try: - getstate = self.__getstate__ - except AttributeError: - if getattr(self, "__slots__", None): - raise TypeError("a class that defines __slots__ without " - "defining __getstate__ cannot be pickled") - try: - dict = self.__dict__ - except AttributeError: - dict = None - else: - dict = getstate() - if dict: - return _reconstructor, args, dict - else: - return _reconstructor, args - -# Helper for __reduce_ex__ protocol 2 - -def __newobj__(cls, *args): - return cls.__new__(cls, *args) - -def _slotnames(cls): - """Return a list of slot names for a given class. - - This needs to find slots defined by the class and its bases, so we - can't simply return the __slots__ attribute. We must walk down - the Method Resolution Order and concatenate the __slots__ of each - class found there. (This assumes classes don't modify their - __slots__ attribute to misrepresent their slots after the class is - defined.) - """ - - # Get the value from a cache in the class if possible - names = cls.__dict__.get("__slotnames__") - if names is not None: - return names - - # Not cached -- calculate the value - names = [] - if not hasattr(cls, "__slots__"): - # This class has no slots - pass - else: - # Slots found -- gather slot names from all base classes - for c in cls.__mro__: - if "__slots__" in c.__dict__: - slots = c.__dict__['__slots__'] - # if class has a single slot, it can be given as a string - if isinstance(slots, basestring): - slots = (slots,) - for name in slots: - # special descriptors - if name in ("__dict__", "__weakref__"): - continue - # mangled names - elif name.startswith('__') and not name.endswith('__'): - names.append('_%s%s' % (c.__name__, name)) - else: - names.append(name) - - # Cache the outcome in the class if at all possible - try: - cls.__slotnames__ = names - except: - pass # But don't die if we can't - - return names - -# A registry of extension codes. This is an ad-hoc compression -# mechanism. Whenever a global reference to , is about -# to be pickled, the (, ) tuple is looked up here to see -# if it is a registered extension code for it. Extension codes are -# universal, so that the meaning of a pickle does not depend on -# context. (There are also some codes reserved for local use that -# don't have this restriction.) Codes are positive ints; 0 is -# reserved. - -_extension_registry = {} # key -> code -_inverted_registry = {} # code -> key -_extension_cache = {} # code -> object -# Don't ever rebind those names: cPickle grabs a reference to them when -# it's initialized, and won't see a rebinding. - -def add_extension(module, name, code): - """Register an extension code.""" - code = int(code) - if not 1 <= code <= 0x7fffffff: - raise ValueError, "code out of range" - key = (module, name) - if (_extension_registry.get(key) == code and - _inverted_registry.get(code) == key): - return # Redundant registrations are benign - if key in _extension_registry: - raise ValueError("key %s is already registered with code %s" % - (key, _extension_registry[key])) - if code in _inverted_registry: - raise ValueError("code %s is already in use for key %s" % - (code, _inverted_registry[code])) - _extension_registry[key] = code - _inverted_registry[code] = key - -def remove_extension(module, name, code): - """Unregister an extension code. For testing only.""" - key = (module, name) - if (_extension_registry.get(key) != code or - _inverted_registry.get(code) != key): - raise ValueError("key %s is not registered with code %s" % - (key, code)) - del _extension_registry[key] - del _inverted_registry[code] - if code in _extension_cache: - del _extension_cache[code] - -def clear_extension_cache(): - _extension_cache.clear() - -# Standard extension code assignments - -# Reserved ranges - -# First Last Count Purpose -# 1 127 127 Reserved for Python standard library -# 128 191 64 Reserved for Zope -# 192 239 48 Reserved for 3rd parties -# 240 255 16 Reserved for private use (will never be assigned) -# 256 Inf Inf Reserved for future assignment - -# Extension codes are assigned by the Python Software Foundation. diff --git a/python/Lib/csv.py b/python/Lib/csv.py deleted file mode 100755 index c155ada794..0000000000 --- a/python/Lib/csv.py +++ /dev/null @@ -1,456 +0,0 @@ - -""" -csv.py - read/write/investigate CSV files -""" - -import re -from functools import reduce -from _csv import Error, __version__, writer, reader, register_dialect, \ - unregister_dialect, get_dialect, list_dialects, \ - field_size_limit, \ - QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \ - __doc__ -from _csv import Dialect as _Dialect - -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -__all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE", - "Error", "Dialect", "__doc__", "excel", "excel_tab", - "field_size_limit", "reader", "writer", - "register_dialect", "get_dialect", "list_dialects", "Sniffer", - "unregister_dialect", "__version__", "DictReader", "DictWriter" ] - -class Dialect: - """Describe an Excel dialect. - - This must be subclassed (see csv.excel). Valid attributes are: - delimiter, quotechar, escapechar, doublequote, skipinitialspace, - lineterminator, quoting. - - """ - _name = "" - _valid = False - # placeholders - delimiter = None - quotechar = None - escapechar = None - doublequote = None - skipinitialspace = None - lineterminator = None - quoting = None - - def __init__(self): - if self.__class__ != Dialect: - self._valid = True - self._validate() - - def _validate(self): - try: - _Dialect(self) - except TypeError, e: - # We do this for compatibility with py2.3 - raise Error(str(e)) - -class excel(Dialect): - """Describe the usual properties of Excel-generated CSV files.""" - delimiter = ',' - quotechar = '"' - doublequote = True - skipinitialspace = False - lineterminator = '\r\n' - quoting = QUOTE_MINIMAL -register_dialect("excel", excel) - -class excel_tab(excel): - """Describe the usual properties of Excel-generated TAB-delimited files.""" - delimiter = '\t' -register_dialect("excel-tab", excel_tab) - - -class DictReader: - def __init__(self, f, fieldnames=None, restkey=None, restval=None, - dialect="excel", *args, **kwds): - self._fieldnames = fieldnames # list of keys for the dict - self.restkey = restkey # key to catch long rows - self.restval = restval # default value for short rows - self.reader = reader(f, dialect, *args, **kwds) - self.dialect = dialect - self.line_num = 0 - - def __iter__(self): - return self - - @property - def fieldnames(self): - if self._fieldnames is None: - try: - self._fieldnames = self.reader.next() - except StopIteration: - pass - self.line_num = self.reader.line_num - return self._fieldnames - - # Issue 20004: Because DictReader is a classic class, this setter is - # ignored. At this point in 2.7's lifecycle, it is too late to change the - # base class for fear of breaking working code. If you want to change - # fieldnames without overwriting the getter, set _fieldnames directly. - @fieldnames.setter - def fieldnames(self, value): - self._fieldnames = value - - def next(self): - if self.line_num == 0: - # Used only for its side effect. - self.fieldnames - row = self.reader.next() - self.line_num = self.reader.line_num - - # unlike the basic reader, we prefer not to return blanks, - # because we will typically wind up with a dict full of None - # values - while row == []: - row = self.reader.next() - d = dict(zip(self.fieldnames, row)) - lf = len(self.fieldnames) - lr = len(row) - if lf < lr: - d[self.restkey] = row[lf:] - elif lf > lr: - for key in self.fieldnames[lr:]: - d[key] = self.restval - return d - - -class DictWriter: - def __init__(self, f, fieldnames, restval="", extrasaction="raise", - dialect="excel", *args, **kwds): - self.fieldnames = fieldnames # list of keys for the dict - self.restval = restval # for writing short dicts - if extrasaction.lower() not in ("raise", "ignore"): - raise ValueError, \ - ("extrasaction (%s) must be 'raise' or 'ignore'" % - extrasaction) - self.extrasaction = extrasaction - self.writer = writer(f, dialect, *args, **kwds) - - def writeheader(self): - header = dict(zip(self.fieldnames, self.fieldnames)) - self.writerow(header) - - def _dict_to_list(self, rowdict): - if self.extrasaction == "raise": - wrong_fields = [k for k in rowdict if k not in self.fieldnames] - if wrong_fields: - raise ValueError("dict contains fields not in fieldnames: " - + ", ".join([repr(x) for x in wrong_fields])) - return [rowdict.get(key, self.restval) for key in self.fieldnames] - - def writerow(self, rowdict): - return self.writer.writerow(self._dict_to_list(rowdict)) - - def writerows(self, rowdicts): - rows = [] - for rowdict in rowdicts: - rows.append(self._dict_to_list(rowdict)) - return self.writer.writerows(rows) - -# Guard Sniffer's type checking against builds that exclude complex() -try: - complex -except NameError: - complex = float - -class Sniffer: - ''' - "Sniffs" the format of a CSV file (i.e. delimiter, quotechar) - Returns a Dialect object. - ''' - def __init__(self): - # in case there is more than one possible delimiter - self.preferred = [',', '\t', ';', ' ', ':'] - - - def sniff(self, sample, delimiters=None): - """ - Returns a dialect (or None) corresponding to the sample - """ - - quotechar, doublequote, delimiter, skipinitialspace = \ - self._guess_quote_and_delimiter(sample, delimiters) - if not delimiter: - delimiter, skipinitialspace = self._guess_delimiter(sample, - delimiters) - - if not delimiter: - raise Error, "Could not determine delimiter" - - class dialect(Dialect): - _name = "sniffed" - lineterminator = '\r\n' - quoting = QUOTE_MINIMAL - # escapechar = '' - - dialect.doublequote = doublequote - dialect.delimiter = delimiter - # _csv.reader won't accept a quotechar of '' - dialect.quotechar = quotechar or '"' - dialect.skipinitialspace = skipinitialspace - - return dialect - - - def _guess_quote_and_delimiter(self, data, delimiters): - """ - Looks for text enclosed between two identical quotes - (the probable quotechar) which are preceded and followed - by the same character (the probable delimiter). - For example: - ,'some text', - The quote with the most wins, same with the delimiter. - If there is no quotechar the delimiter can't be determined - this way. - """ - - matches = [] - for restr in ('(?P[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?P=delim)', # ,".*?", - '(?:^|\n)(?P["\']).*?(?P=quote)(?P[^\w\n"\'])(?P ?)', # ".*?", - '(?P>[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?:$|\n)', # ,".*?" - '(?:^|\n)(?P["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space) - regexp = re.compile(restr, re.DOTALL | re.MULTILINE) - matches = regexp.findall(data) - if matches: - break - - if not matches: - # (quotechar, doublequote, delimiter, skipinitialspace) - return ('', False, None, 0) - quotes = {} - delims = {} - spaces = 0 - for m in matches: - n = regexp.groupindex['quote'] - 1 - key = m[n] - if key: - quotes[key] = quotes.get(key, 0) + 1 - try: - n = regexp.groupindex['delim'] - 1 - key = m[n] - except KeyError: - continue - if key and (delimiters is None or key in delimiters): - delims[key] = delims.get(key, 0) + 1 - try: - n = regexp.groupindex['space'] - 1 - except KeyError: - continue - if m[n]: - spaces += 1 - - quotechar = reduce(lambda a, b, quotes = quotes: - (quotes[a] > quotes[b]) and a or b, quotes.keys()) - - if delims: - delim = reduce(lambda a, b, delims = delims: - (delims[a] > delims[b]) and a or b, delims.keys()) - skipinitialspace = delims[delim] == spaces - if delim == '\n': # most likely a file with a single column - delim = '' - else: - # there is *no* delimiter, it's a single column of quoted data - delim = '' - skipinitialspace = 0 - - # if we see an extra quote between delimiters, we've got a - # double quoted format - dq_regexp = re.compile( - r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \ - {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE) - - - - if dq_regexp.search(data): - doublequote = True - else: - doublequote = False - - return (quotechar, doublequote, delim, skipinitialspace) - - - def _guess_delimiter(self, data, delimiters): - """ - The delimiter /should/ occur the same number of times on - each row. However, due to malformed data, it may not. We don't want - an all or nothing approach, so we allow for small variations in this - number. - 1) build a table of the frequency of each character on every line. - 2) build a table of frequencies of this frequency (meta-frequency?), - e.g. 'x occurred 5 times in 10 rows, 6 times in 1000 rows, - 7 times in 2 rows' - 3) use the mode of the meta-frequency to determine the /expected/ - frequency for that character - 4) find out how often the character actually meets that goal - 5) the character that best meets its goal is the delimiter - For performance reasons, the data is evaluated in chunks, so it can - try and evaluate the smallest portion of the data possible, evaluating - additional chunks as necessary. - """ - - data = filter(None, data.split('\n')) - - ascii = [chr(c) for c in range(127)] # 7-bit ASCII - - # build frequency tables - chunkLength = min(10, len(data)) - iteration = 0 - charFrequency = {} - modes = {} - delims = {} - start, end = 0, min(chunkLength, len(data)) - while start < len(data): - iteration += 1 - for line in data[start:end]: - for char in ascii: - metaFrequency = charFrequency.get(char, {}) - # must count even if frequency is 0 - freq = line.count(char) - # value is the mode - metaFrequency[freq] = metaFrequency.get(freq, 0) + 1 - charFrequency[char] = metaFrequency - - for char in charFrequency.keys(): - items = charFrequency[char].items() - if len(items) == 1 and items[0][0] == 0: - continue - # get the mode of the frequencies - if len(items) > 1: - modes[char] = reduce(lambda a, b: a[1] > b[1] and a or b, - items) - # adjust the mode - subtract the sum of all - # other frequencies - items.remove(modes[char]) - modes[char] = (modes[char][0], modes[char][1] - - reduce(lambda a, b: (0, a[1] + b[1]), - items)[1]) - else: - modes[char] = items[0] - - # build a list of possible delimiters - modeList = modes.items() - total = float(chunkLength * iteration) - # (rows of consistent data) / (number of rows) = 100% - consistency = 1.0 - # minimum consistency threshold - threshold = 0.9 - while len(delims) == 0 and consistency >= threshold: - for k, v in modeList: - if v[0] > 0 and v[1] > 0: - if ((v[1]/total) >= consistency and - (delimiters is None or k in delimiters)): - delims[k] = v - consistency -= 0.01 - - if len(delims) == 1: - delim = delims.keys()[0] - skipinitialspace = (data[0].count(delim) == - data[0].count("%c " % delim)) - return (delim, skipinitialspace) - - # analyze another chunkLength lines - start = end - end += chunkLength - - if not delims: - return ('', 0) - - # if there's more than one, fall back to a 'preferred' list - if len(delims) > 1: - for d in self.preferred: - if d in delims.keys(): - skipinitialspace = (data[0].count(d) == - data[0].count("%c " % d)) - return (d, skipinitialspace) - - # nothing else indicates a preference, pick the character that - # dominates(?) - items = [(v,k) for (k,v) in delims.items()] - items.sort() - delim = items[-1][1] - - skipinitialspace = (data[0].count(delim) == - data[0].count("%c " % delim)) - return (delim, skipinitialspace) - - - def has_header(self, sample): - # Creates a dictionary of types of data in each column. If any - # column is of a single type (say, integers), *except* for the first - # row, then the first row is presumed to be labels. If the type - # can't be determined, it is assumed to be a string in which case - # the length of the string is the determining factor: if all of the - # rows except for the first are the same length, it's a header. - # Finally, a 'vote' is taken at the end for each column, adding or - # subtracting from the likelihood of the first row being a header. - - rdr = reader(StringIO(sample), self.sniff(sample)) - - header = rdr.next() # assume first row is header - - columns = len(header) - columnTypes = {} - for i in range(columns): columnTypes[i] = None - - checked = 0 - for row in rdr: - # arbitrary number of rows to check, to keep it sane - if checked > 20: - break - checked += 1 - - if len(row) != columns: - continue # skip rows that have irregular number of columns - - for col in columnTypes.keys(): - - for thisType in [int, long, float, complex]: - try: - thisType(row[col]) - break - except (ValueError, OverflowError): - pass - else: - # fallback to length of string - thisType = len(row[col]) - - # treat longs as ints - if thisType == long: - thisType = int - - if thisType != columnTypes[col]: - if columnTypes[col] is None: # add new column type - columnTypes[col] = thisType - else: - # type is inconsistent, remove column from - # consideration - del columnTypes[col] - - # finally, compare results against first row and "vote" - # on whether it's a header - hasHeader = 0 - for col, colType in columnTypes.items(): - if type(colType) == type(0): # it's a length - if len(header[col]) != colType: - hasHeader += 1 - else: - hasHeader -= 1 - else: # attempt typecast - try: - colType(header[col]) - except (ValueError, TypeError): - hasHeader += 1 - else: - hasHeader -= 1 - - return hasHeader > 0 diff --git a/python/Lib/ctypes/__init__.py b/python/Lib/ctypes/__init__.py deleted file mode 100755 index e24cfd2bed..0000000000 --- a/python/Lib/ctypes/__init__.py +++ /dev/null @@ -1,552 +0,0 @@ -"""create and manipulate C data types in Python""" - -import os as _os, sys as _sys - -__version__ = "1.1.0" - -from _ctypes import Union, Structure, Array -from _ctypes import _Pointer -from _ctypes import CFuncPtr as _CFuncPtr -from _ctypes import __version__ as _ctypes_version -from _ctypes import RTLD_LOCAL, RTLD_GLOBAL -from _ctypes import ArgumentError - -from struct import calcsize as _calcsize - -if __version__ != _ctypes_version: - raise Exception("Version number mismatch", __version__, _ctypes_version) - -if _os.name in ("nt", "ce"): - from _ctypes import FormatError - -DEFAULT_MODE = RTLD_LOCAL -if _os.name == "posix" and _sys.platform == "darwin": - # On OS X 10.3, we use RTLD_GLOBAL as default mode - # because RTLD_LOCAL does not work at least on some - # libraries. OS X 10.3 is Darwin 7, so we check for - # that. - - if int(_os.uname()[2].split('.')[0]) < 8: - DEFAULT_MODE = RTLD_GLOBAL - -from _ctypes import FUNCFLAG_CDECL as _FUNCFLAG_CDECL, \ - FUNCFLAG_PYTHONAPI as _FUNCFLAG_PYTHONAPI, \ - FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \ - FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR - -""" -WINOLEAPI -> HRESULT -WINOLEAPI_(type) - -STDMETHODCALLTYPE - -STDMETHOD(name) -STDMETHOD_(type, name) - -STDAPICALLTYPE -""" - -def create_string_buffer(init, size=None): - """create_string_buffer(aString) -> character array - create_string_buffer(anInteger) -> character array - create_string_buffer(aString, anInteger) -> character array - """ - if isinstance(init, (str, unicode)): - if size is None: - size = len(init)+1 - buftype = c_char * size - buf = buftype() - buf.value = init - return buf - elif isinstance(init, (int, long)): - buftype = c_char * init - buf = buftype() - return buf - raise TypeError(init) - -def c_buffer(init, size=None): -## "deprecated, use create_string_buffer instead" -## import warnings -## warnings.warn("c_buffer is deprecated, use create_string_buffer instead", -## DeprecationWarning, stacklevel=2) - return create_string_buffer(init, size) - -_c_functype_cache = {} -def CFUNCTYPE(restype, *argtypes, **kw): - """CFUNCTYPE(restype, *argtypes, - use_errno=False, use_last_error=False) -> function prototype. - - restype: the result type - argtypes: a sequence specifying the argument types - - The function prototype can be called in different ways to create a - callable object: - - prototype(integer address) -> foreign function - prototype(callable) -> create and return a C callable function from callable - prototype(integer index, method name[, paramflags]) -> foreign function calling a COM method - prototype((ordinal number, dll object)[, paramflags]) -> foreign function exported by ordinal - prototype((function name, dll object)[, paramflags]) -> foreign function exported by name - """ - flags = _FUNCFLAG_CDECL - if kw.pop("use_errno", False): - flags |= _FUNCFLAG_USE_ERRNO - if kw.pop("use_last_error", False): - flags |= _FUNCFLAG_USE_LASTERROR - if kw: - raise ValueError("unexpected keyword argument(s) %s" % kw.keys()) - try: - return _c_functype_cache[(restype, argtypes, flags)] - except KeyError: - class CFunctionType(_CFuncPtr): - _argtypes_ = argtypes - _restype_ = restype - _flags_ = flags - _c_functype_cache[(restype, argtypes, flags)] = CFunctionType - return CFunctionType - -if _os.name in ("nt", "ce"): - from _ctypes import LoadLibrary as _dlopen - from _ctypes import FUNCFLAG_STDCALL as _FUNCFLAG_STDCALL - if _os.name == "ce": - # 'ce' doesn't have the stdcall calling convention - _FUNCFLAG_STDCALL = _FUNCFLAG_CDECL - - _win_functype_cache = {} - def WINFUNCTYPE(restype, *argtypes, **kw): - # docstring set later (very similar to CFUNCTYPE.__doc__) - flags = _FUNCFLAG_STDCALL - if kw.pop("use_errno", False): - flags |= _FUNCFLAG_USE_ERRNO - if kw.pop("use_last_error", False): - flags |= _FUNCFLAG_USE_LASTERROR - if kw: - raise ValueError("unexpected keyword argument(s) %s" % kw.keys()) - try: - return _win_functype_cache[(restype, argtypes, flags)] - except KeyError: - class WinFunctionType(_CFuncPtr): - _argtypes_ = argtypes - _restype_ = restype - _flags_ = flags - _win_functype_cache[(restype, argtypes, flags)] = WinFunctionType - return WinFunctionType - if WINFUNCTYPE.__doc__: - WINFUNCTYPE.__doc__ = CFUNCTYPE.__doc__.replace("CFUNCTYPE", "WINFUNCTYPE") - -elif _os.name == "posix": - from _ctypes import dlopen as _dlopen - -from _ctypes import sizeof, byref, addressof, alignment, resize -from _ctypes import get_errno, set_errno -from _ctypes import _SimpleCData - -def _check_size(typ, typecode=None): - # Check if sizeof(ctypes_type) against struct.calcsize. This - # should protect somewhat against a misconfigured libffi. - from struct import calcsize - if typecode is None: - # Most _type_ codes are the same as used in struct - typecode = typ._type_ - actual, required = sizeof(typ), calcsize(typecode) - if actual != required: - raise SystemError("sizeof(%s) wrong: %d instead of %d" % \ - (typ, actual, required)) - -class py_object(_SimpleCData): - _type_ = "O" - def __repr__(self): - try: - return super(py_object, self).__repr__() - except ValueError: - return "%s()" % type(self).__name__ -_check_size(py_object, "P") - -class c_short(_SimpleCData): - _type_ = "h" -_check_size(c_short) - -class c_ushort(_SimpleCData): - _type_ = "H" -_check_size(c_ushort) - -class c_long(_SimpleCData): - _type_ = "l" -_check_size(c_long) - -class c_ulong(_SimpleCData): - _type_ = "L" -_check_size(c_ulong) - -if _calcsize("i") == _calcsize("l"): - # if int and long have the same size, make c_int an alias for c_long - c_int = c_long - c_uint = c_ulong -else: - class c_int(_SimpleCData): - _type_ = "i" - _check_size(c_int) - - class c_uint(_SimpleCData): - _type_ = "I" - _check_size(c_uint) - -class c_float(_SimpleCData): - _type_ = "f" -_check_size(c_float) - -class c_double(_SimpleCData): - _type_ = "d" -_check_size(c_double) - -class c_longdouble(_SimpleCData): - _type_ = "g" -if sizeof(c_longdouble) == sizeof(c_double): - c_longdouble = c_double - -if _calcsize("l") == _calcsize("q"): - # if long and long long have the same size, make c_longlong an alias for c_long - c_longlong = c_long - c_ulonglong = c_ulong -else: - class c_longlong(_SimpleCData): - _type_ = "q" - _check_size(c_longlong) - - class c_ulonglong(_SimpleCData): - _type_ = "Q" - ## def from_param(cls, val): - ## return ('d', float(val), val) - ## from_param = classmethod(from_param) - _check_size(c_ulonglong) - -class c_ubyte(_SimpleCData): - _type_ = "B" -c_ubyte.__ctype_le__ = c_ubyte.__ctype_be__ = c_ubyte -# backward compatibility: -##c_uchar = c_ubyte -_check_size(c_ubyte) - -class c_byte(_SimpleCData): - _type_ = "b" -c_byte.__ctype_le__ = c_byte.__ctype_be__ = c_byte -_check_size(c_byte) - -class c_char(_SimpleCData): - _type_ = "c" -c_char.__ctype_le__ = c_char.__ctype_be__ = c_char -_check_size(c_char) - -class c_char_p(_SimpleCData): - _type_ = "z" - if _os.name == "nt": - def __repr__(self): - if not windll.kernel32.IsBadStringPtrA(self, -1): - return "%s(%r)" % (self.__class__.__name__, self.value) - return "%s(%s)" % (self.__class__.__name__, cast(self, c_void_p).value) - else: - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, cast(self, c_void_p).value) -_check_size(c_char_p, "P") - -class c_void_p(_SimpleCData): - _type_ = "P" -c_voidp = c_void_p # backwards compatibility (to a bug) -_check_size(c_void_p) - -class c_bool(_SimpleCData): - _type_ = "?" - -from _ctypes import POINTER, pointer, _pointer_type_cache - -def _reset_cache(): - _pointer_type_cache.clear() - _c_functype_cache.clear() - if _os.name in ("nt", "ce"): - _win_functype_cache.clear() - # _SimpleCData.c_wchar_p_from_param - POINTER(c_wchar).from_param = c_wchar_p.from_param - # _SimpleCData.c_char_p_from_param - POINTER(c_char).from_param = c_char_p.from_param - _pointer_type_cache[None] = c_void_p - # XXX for whatever reasons, creating the first instance of a callback - # function is needed for the unittests on Win64 to succeed. This MAY - # be a compiler bug, since the problem occurs only when _ctypes is - # compiled with the MS SDK compiler. Or an uninitialized variable? - CFUNCTYPE(c_int)(lambda: None) - -try: - from _ctypes import set_conversion_mode -except ImportError: - pass -else: - if _os.name in ("nt", "ce"): - set_conversion_mode("mbcs", "ignore") - else: - set_conversion_mode("ascii", "strict") - - class c_wchar_p(_SimpleCData): - _type_ = "Z" - - class c_wchar(_SimpleCData): - _type_ = "u" - - def create_unicode_buffer(init, size=None): - """create_unicode_buffer(aString) -> character array - create_unicode_buffer(anInteger) -> character array - create_unicode_buffer(aString, anInteger) -> character array - """ - if isinstance(init, (str, unicode)): - if size is None: - size = len(init)+1 - buftype = c_wchar * size - buf = buftype() - buf.value = init - return buf - elif isinstance(init, (int, long)): - buftype = c_wchar * init - buf = buftype() - return buf - raise TypeError(init) - -# XXX Deprecated -def SetPointerType(pointer, cls): - if _pointer_type_cache.get(cls, None) is not None: - raise RuntimeError("This type already exists in the cache") - if id(pointer) not in _pointer_type_cache: - raise RuntimeError("What's this???") - pointer.set_type(cls) - _pointer_type_cache[cls] = pointer - del _pointer_type_cache[id(pointer)] - -# XXX Deprecated -def ARRAY(typ, len): - return typ * len - -################################################################ - - -class CDLL(object): - """An instance of this class represents a loaded dll/shared - library, exporting functions using the standard C calling - convention (named 'cdecl' on Windows). - - The exported functions can be accessed as attributes, or by - indexing with the function name. Examples: - - .qsort -> callable object - ['qsort'] -> callable object - - Calling the functions releases the Python GIL during the call and - reacquires it afterwards. - """ - _func_flags_ = _FUNCFLAG_CDECL - _func_restype_ = c_int - - def __init__(self, name, mode=DEFAULT_MODE, handle=None, - use_errno=False, - use_last_error=False): - self._name = name - flags = self._func_flags_ - if use_errno: - flags |= _FUNCFLAG_USE_ERRNO - if use_last_error: - flags |= _FUNCFLAG_USE_LASTERROR - - class _FuncPtr(_CFuncPtr): - _flags_ = flags - _restype_ = self._func_restype_ - self._FuncPtr = _FuncPtr - - if handle is None: - self._handle = _dlopen(self._name, mode) - else: - self._handle = handle - - def __repr__(self): - return "<%s '%s', handle %x at %x>" % \ - (self.__class__.__name__, self._name, - (self._handle & (_sys.maxint*2 + 1)), - id(self) & (_sys.maxint*2 + 1)) - - def __getattr__(self, name): - if name.startswith('__') and name.endswith('__'): - raise AttributeError(name) - func = self.__getitem__(name) - setattr(self, name, func) - return func - - def __getitem__(self, name_or_ordinal): - func = self._FuncPtr((name_or_ordinal, self)) - if not isinstance(name_or_ordinal, (int, long)): - func.__name__ = name_or_ordinal - return func - -class PyDLL(CDLL): - """This class represents the Python library itself. It allows - accessing Python API functions. The GIL is not released, and - Python exceptions are handled correctly. - """ - _func_flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI - -if _os.name in ("nt", "ce"): - - class WinDLL(CDLL): - """This class represents a dll exporting functions using the - Windows stdcall calling convention. - """ - _func_flags_ = _FUNCFLAG_STDCALL - - # XXX Hm, what about HRESULT as normal parameter? - # Mustn't it derive from c_long then? - from _ctypes import _check_HRESULT, _SimpleCData - class HRESULT(_SimpleCData): - _type_ = "l" - # _check_retval_ is called with the function's result when it - # is used as restype. It checks for the FAILED bit, and - # raises a WindowsError if it is set. - # - # The _check_retval_ method is implemented in C, so that the - # method definition itself is not included in the traceback - # when it raises an error - that is what we want (and Python - # doesn't have a way to raise an exception in the caller's - # frame). - _check_retval_ = _check_HRESULT - - class OleDLL(CDLL): - """This class represents a dll exporting functions using the - Windows stdcall calling convention, and returning HRESULT. - HRESULT error values are automatically raised as WindowsError - exceptions. - """ - _func_flags_ = _FUNCFLAG_STDCALL - _func_restype_ = HRESULT - -class LibraryLoader(object): - def __init__(self, dlltype): - self._dlltype = dlltype - - def __getattr__(self, name): - if name[0] == '_': - raise AttributeError(name) - dll = self._dlltype(name) - setattr(self, name, dll) - return dll - - def __getitem__(self, name): - return getattr(self, name) - - def LoadLibrary(self, name): - return self._dlltype(name) - -cdll = LibraryLoader(CDLL) -pydll = LibraryLoader(PyDLL) - -if _os.name in ("nt", "ce"): - pythonapi = PyDLL("python dll", None, _sys.dllhandle) -elif _sys.platform == "cygwin": - pythonapi = PyDLL("libpython%d.%d.dll" % _sys.version_info[:2]) -else: - pythonapi = PyDLL(None) - - -if _os.name in ("nt", "ce"): - windll = LibraryLoader(WinDLL) - oledll = LibraryLoader(OleDLL) - - if _os.name == "nt": - GetLastError = windll.kernel32.GetLastError - else: - GetLastError = windll.coredll.GetLastError - from _ctypes import get_last_error, set_last_error - - def WinError(code=None, descr=None): - if code is None: - code = GetLastError() - if descr is None: - descr = FormatError(code).strip() - return WindowsError(code, descr) - -if sizeof(c_uint) == sizeof(c_void_p): - c_size_t = c_uint - c_ssize_t = c_int -elif sizeof(c_ulong) == sizeof(c_void_p): - c_size_t = c_ulong - c_ssize_t = c_long -elif sizeof(c_ulonglong) == sizeof(c_void_p): - c_size_t = c_ulonglong - c_ssize_t = c_longlong - -# functions - -from _ctypes import _memmove_addr, _memset_addr, _string_at_addr, _cast_addr - -## void *memmove(void *, const void *, size_t); -memmove = CFUNCTYPE(c_void_p, c_void_p, c_void_p, c_size_t)(_memmove_addr) - -## void *memset(void *, int, size_t) -memset = CFUNCTYPE(c_void_p, c_void_p, c_int, c_size_t)(_memset_addr) - -def PYFUNCTYPE(restype, *argtypes): - class CFunctionType(_CFuncPtr): - _argtypes_ = argtypes - _restype_ = restype - _flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI - return CFunctionType - -_cast = PYFUNCTYPE(py_object, c_void_p, py_object, py_object)(_cast_addr) -def cast(obj, typ): - return _cast(obj, obj, typ) - -_string_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_string_at_addr) -def string_at(ptr, size=-1): - """string_at(addr[, size]) -> string - - Return the string at addr.""" - return _string_at(ptr, size) - -try: - from _ctypes import _wstring_at_addr -except ImportError: - pass -else: - _wstring_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_wstring_at_addr) - def wstring_at(ptr, size=-1): - """wstring_at(addr[, size]) -> string - - Return the string at addr.""" - return _wstring_at(ptr, size) - - -if _os.name in ("nt", "ce"): # COM stuff - def DllGetClassObject(rclsid, riid, ppv): - try: - ccom = __import__("comtypes.server.inprocserver", globals(), locals(), ['*']) - except ImportError: - return -2147221231 # CLASS_E_CLASSNOTAVAILABLE - else: - return ccom.DllGetClassObject(rclsid, riid, ppv) - - def DllCanUnloadNow(): - try: - ccom = __import__("comtypes.server.inprocserver", globals(), locals(), ['*']) - except ImportError: - return 0 # S_OK - return ccom.DllCanUnloadNow() - -from ctypes._endian import BigEndianStructure, LittleEndianStructure - -# Fill in specifically-sized types -c_int8 = c_byte -c_uint8 = c_ubyte -for kind in [c_short, c_int, c_long, c_longlong]: - if sizeof(kind) == 2: c_int16 = kind - elif sizeof(kind) == 4: c_int32 = kind - elif sizeof(kind) == 8: c_int64 = kind -for kind in [c_ushort, c_uint, c_ulong, c_ulonglong]: - if sizeof(kind) == 2: c_uint16 = kind - elif sizeof(kind) == 4: c_uint32 = kind - elif sizeof(kind) == 8: c_uint64 = kind -del(kind) - -_reset_cache() diff --git a/python/Lib/ctypes/_endian.py b/python/Lib/ctypes/_endian.py deleted file mode 100755 index c0ba646ffc..0000000000 --- a/python/Lib/ctypes/_endian.py +++ /dev/null @@ -1,61 +0,0 @@ -import sys -from ctypes import * - -_array_type = type(Array) - -def _other_endian(typ): - """Return the type with the 'other' byte order. Simple types like - c_int and so on already have __ctype_be__ and __ctype_le__ - attributes which contain the types, for more complicated types - arrays and structures are supported. - """ - # check _OTHER_ENDIAN attribute (present if typ is primitive type) - if hasattr(typ, _OTHER_ENDIAN): - return getattr(typ, _OTHER_ENDIAN) - # if typ is array - if isinstance(typ, _array_type): - return _other_endian(typ._type_) * typ._length_ - # if typ is structure - if issubclass(typ, Structure): - return typ - raise TypeError("This type does not support other endian: %s" % typ) - -class _swapped_meta(type(Structure)): - def __setattr__(self, attrname, value): - if attrname == "_fields_": - fields = [] - for desc in value: - name = desc[0] - typ = desc[1] - rest = desc[2:] - fields.append((name, _other_endian(typ)) + rest) - value = fields - super(_swapped_meta, self).__setattr__(attrname, value) - -################################################################ - -# Note: The Structure metaclass checks for the *presence* (not the -# value!) of a _swapped_bytes_ attribute to determine the bit order in -# structures containing bit fields. - -if sys.byteorder == "little": - _OTHER_ENDIAN = "__ctype_be__" - - LittleEndianStructure = Structure - - class BigEndianStructure(Structure): - """Structure with big endian byte order""" - __metaclass__ = _swapped_meta - _swappedbytes_ = None - -elif sys.byteorder == "big": - _OTHER_ENDIAN = "__ctype_le__" - - BigEndianStructure = Structure - class LittleEndianStructure(Structure): - """Structure with little endian byte order""" - __metaclass__ = _swapped_meta - _swappedbytes_ = None - -else: - raise RuntimeError("Invalid byteorder") diff --git a/python/Lib/ctypes/macholib/__init__.py b/python/Lib/ctypes/macholib/__init__.py deleted file mode 100755 index 5621defccd..0000000000 --- a/python/Lib/ctypes/macholib/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -Enough Mach-O to make your head spin. - -See the relevant header files in /usr/include/mach-o - -And also Apple's documentation. -""" - -__version__ = '1.0' diff --git a/python/Lib/ctypes/macholib/dyld.py b/python/Lib/ctypes/macholib/dyld.py deleted file mode 100755 index 1fdf8d648f..0000000000 --- a/python/Lib/ctypes/macholib/dyld.py +++ /dev/null @@ -1,166 +0,0 @@ -""" -dyld emulation -""" - -import os -from framework import framework_info -from dylib import dylib_info -from itertools import * - -__all__ = [ - 'dyld_find', 'framework_find', - 'framework_info', 'dylib_info', -] - -# These are the defaults as per man dyld(1) -# -DEFAULT_FRAMEWORK_FALLBACK = [ - os.path.expanduser("~/Library/Frameworks"), - "/Library/Frameworks", - "/Network/Library/Frameworks", - "/System/Library/Frameworks", -] - -DEFAULT_LIBRARY_FALLBACK = [ - os.path.expanduser("~/lib"), - "/usr/local/lib", - "/lib", - "/usr/lib", -] - -def ensure_utf8(s): - """Not all of PyObjC and Python understand unicode paths very well yet""" - if isinstance(s, unicode): - return s.encode('utf8') - return s - -def dyld_env(env, var): - if env is None: - env = os.environ - rval = env.get(var) - if rval is None: - return [] - return rval.split(':') - -def dyld_image_suffix(env=None): - if env is None: - env = os.environ - return env.get('DYLD_IMAGE_SUFFIX') - -def dyld_framework_path(env=None): - return dyld_env(env, 'DYLD_FRAMEWORK_PATH') - -def dyld_library_path(env=None): - return dyld_env(env, 'DYLD_LIBRARY_PATH') - -def dyld_fallback_framework_path(env=None): - return dyld_env(env, 'DYLD_FALLBACK_FRAMEWORK_PATH') - -def dyld_fallback_library_path(env=None): - return dyld_env(env, 'DYLD_FALLBACK_LIBRARY_PATH') - -def dyld_image_suffix_search(iterator, env=None): - """For a potential path iterator, add DYLD_IMAGE_SUFFIX semantics""" - suffix = dyld_image_suffix(env) - if suffix is None: - return iterator - def _inject(iterator=iterator, suffix=suffix): - for path in iterator: - if path.endswith('.dylib'): - yield path[:-len('.dylib')] + suffix + '.dylib' - else: - yield path + suffix - yield path - return _inject() - -def dyld_override_search(name, env=None): - # If DYLD_FRAMEWORK_PATH is set and this dylib_name is a - # framework name, use the first file that exists in the framework - # path if any. If there is none go on to search the DYLD_LIBRARY_PATH - # if any. - - framework = framework_info(name) - - if framework is not None: - for path in dyld_framework_path(env): - yield os.path.join(path, framework['name']) - - # If DYLD_LIBRARY_PATH is set then use the first file that exists - # in the path. If none use the original name. - for path in dyld_library_path(env): - yield os.path.join(path, os.path.basename(name)) - -def dyld_executable_path_search(name, executable_path=None): - # If we haven't done any searching and found a library and the - # dylib_name starts with "@executable_path/" then construct the - # library name. - if name.startswith('@executable_path/') and executable_path is not None: - yield os.path.join(executable_path, name[len('@executable_path/'):]) - -def dyld_default_search(name, env=None): - yield name - - framework = framework_info(name) - - if framework is not None: - fallback_framework_path = dyld_fallback_framework_path(env) - for path in fallback_framework_path: - yield os.path.join(path, framework['name']) - - fallback_library_path = dyld_fallback_library_path(env) - for path in fallback_library_path: - yield os.path.join(path, os.path.basename(name)) - - if framework is not None and not fallback_framework_path: - for path in DEFAULT_FRAMEWORK_FALLBACK: - yield os.path.join(path, framework['name']) - - if not fallback_library_path: - for path in DEFAULT_LIBRARY_FALLBACK: - yield os.path.join(path, os.path.basename(name)) - -def dyld_find(name, executable_path=None, env=None): - """ - Find a library or framework using dyld semantics - """ - name = ensure_utf8(name) - executable_path = ensure_utf8(executable_path) - for path in dyld_image_suffix_search(chain( - dyld_override_search(name, env), - dyld_executable_path_search(name, executable_path), - dyld_default_search(name, env), - ), env): - if os.path.isfile(path): - return path - raise ValueError("dylib %s could not be found" % (name,)) - -def framework_find(fn, executable_path=None, env=None): - """ - Find a framework using dyld semantics in a very loose manner. - - Will take input such as: - Python - Python.framework - Python.framework/Versions/Current - """ - try: - return dyld_find(fn, executable_path=executable_path, env=env) - except ValueError, e: - pass - fmwk_index = fn.rfind('.framework') - if fmwk_index == -1: - fmwk_index = len(fn) - fn += '.framework' - fn = os.path.join(fn, os.path.basename(fn[:fmwk_index])) - try: - return dyld_find(fn, executable_path=executable_path, env=env) - except ValueError: - raise e - -def test_dyld_find(): - env = {} - assert dyld_find('libSystem.dylib') == '/usr/lib/libSystem.dylib' - assert dyld_find('System.framework/System') == '/System/Library/Frameworks/System.framework/System' - -if __name__ == '__main__': - test_dyld_find() diff --git a/python/Lib/ctypes/macholib/dylib.py b/python/Lib/ctypes/macholib/dylib.py deleted file mode 100755 index aa107507bd..0000000000 --- a/python/Lib/ctypes/macholib/dylib.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -Generic dylib path manipulation -""" - -import re - -__all__ = ['dylib_info'] - -DYLIB_RE = re.compile(r"""(?x) -(?P^.*)(?:^|/) -(?P - (?P\w+?) - (?:\.(?P[^._]+))? - (?:_(?P[^._]+))? - \.dylib$ -) -""") - -def dylib_info(filename): - """ - A dylib name can take one of the following four forms: - Location/Name.SomeVersion_Suffix.dylib - Location/Name.SomeVersion.dylib - Location/Name_Suffix.dylib - Location/Name.dylib - - returns None if not found or a mapping equivalent to: - dict( - location='Location', - name='Name.SomeVersion_Suffix.dylib', - shortname='Name', - version='SomeVersion', - suffix='Suffix', - ) - - Note that SomeVersion and Suffix are optional and may be None - if not present. - """ - is_dylib = DYLIB_RE.match(filename) - if not is_dylib: - return None - return is_dylib.groupdict() - - -def test_dylib_info(): - def d(location=None, name=None, shortname=None, version=None, suffix=None): - return dict( - location=location, - name=name, - shortname=shortname, - version=version, - suffix=suffix - ) - assert dylib_info('completely/invalid') is None - assert dylib_info('completely/invalide_debug') is None - assert dylib_info('P/Foo.dylib') == d('P', 'Foo.dylib', 'Foo') - assert dylib_info('P/Foo_debug.dylib') == d('P', 'Foo_debug.dylib', 'Foo', suffix='debug') - assert dylib_info('P/Foo.A.dylib') == d('P', 'Foo.A.dylib', 'Foo', 'A') - assert dylib_info('P/Foo_debug.A.dylib') == d('P', 'Foo_debug.A.dylib', 'Foo_debug', 'A') - assert dylib_info('P/Foo.A_debug.dylib') == d('P', 'Foo.A_debug.dylib', 'Foo', 'A', 'debug') - -if __name__ == '__main__': - test_dylib_info() diff --git a/python/Lib/ctypes/macholib/framework.py b/python/Lib/ctypes/macholib/framework.py deleted file mode 100755 index ad6ed554ba..0000000000 --- a/python/Lib/ctypes/macholib/framework.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -Generic framework path manipulation -""" - -import re - -__all__ = ['framework_info'] - -STRICT_FRAMEWORK_RE = re.compile(r"""(?x) -(?P^.*)(?:^|/) -(?P - (?P\w+).framework/ - (?:Versions/(?P[^/]+)/)? - (?P=shortname) - (?:_(?P[^_]+))? -)$ -""") - -def framework_info(filename): - """ - A framework name can take one of the following four forms: - Location/Name.framework/Versions/SomeVersion/Name_Suffix - Location/Name.framework/Versions/SomeVersion/Name - Location/Name.framework/Name_Suffix - Location/Name.framework/Name - - returns None if not found, or a mapping equivalent to: - dict( - location='Location', - name='Name.framework/Versions/SomeVersion/Name_Suffix', - shortname='Name', - version='SomeVersion', - suffix='Suffix', - ) - - Note that SomeVersion and Suffix are optional and may be None - if not present - """ - is_framework = STRICT_FRAMEWORK_RE.match(filename) - if not is_framework: - return None - return is_framework.groupdict() - -def test_framework_info(): - def d(location=None, name=None, shortname=None, version=None, suffix=None): - return dict( - location=location, - name=name, - shortname=shortname, - version=version, - suffix=suffix - ) - assert framework_info('completely/invalid') is None - assert framework_info('completely/invalid/_debug') is None - assert framework_info('P/F.framework') is None - assert framework_info('P/F.framework/_debug') is None - assert framework_info('P/F.framework/F') == d('P', 'F.framework/F', 'F') - assert framework_info('P/F.framework/F_debug') == d('P', 'F.framework/F_debug', 'F', suffix='debug') - assert framework_info('P/F.framework/Versions') is None - assert framework_info('P/F.framework/Versions/A') is None - assert framework_info('P/F.framework/Versions/A/F') == d('P', 'F.framework/Versions/A/F', 'F', 'A') - assert framework_info('P/F.framework/Versions/A/F_debug') == d('P', 'F.framework/Versions/A/F_debug', 'F', 'A', 'debug') - -if __name__ == '__main__': - test_framework_info() diff --git a/python/Lib/ctypes/util.py b/python/Lib/ctypes/util.py deleted file mode 100755 index 8ef7ee2a5a..0000000000 --- a/python/Lib/ctypes/util.py +++ /dev/null @@ -1,308 +0,0 @@ -import os -import subprocess -import sys - -# find_library(name) returns the pathname of a library, or None. -if os.name == "nt": - - def _get_build_version(): - """Return the version of MSVC that was used to build Python. - - For Python 2.3 and up, the version number is included in - sys.version. For earlier versions, assume the compiler is MSVC 6. - """ - # This function was copied from Lib/distutils/msvccompiler.py - prefix = "MSC v." - i = sys.version.find(prefix) - if i == -1: - return 6 - i = i + len(prefix) - s, rest = sys.version[i:].split(" ", 1) - majorVersion = int(s[:-2]) - 6 - minorVersion = int(s[2:3]) / 10.0 - # I don't think paths are affected by minor version in version 6 - if majorVersion == 6: - minorVersion = 0 - if majorVersion >= 6: - return majorVersion + minorVersion - # else we don't know what version of the compiler this is - return None - - def find_msvcrt(): - """Return the name of the VC runtime dll""" - version = _get_build_version() - if version is None: - # better be safe than sorry - return None - if version <= 6: - clibname = 'msvcrt' - else: - clibname = 'msvcr%d' % (version * 10) - - # If python was built with in debug mode - import imp - if imp.get_suffixes()[0][0] == '_d.pyd': - clibname += 'd' - return clibname+'.dll' - - def find_library(name): - if name in ('c', 'm'): - return find_msvcrt() - # See MSDN for the REAL search order. - for directory in os.environ['PATH'].split(os.pathsep): - fname = os.path.join(directory, name) - if os.path.isfile(fname): - return fname - if fname.lower().endswith(".dll"): - continue - fname = fname + ".dll" - if os.path.isfile(fname): - return fname - return None - -if os.name == "ce": - # search path according to MSDN: - # - absolute path specified by filename - # - The .exe launch directory - # - the Windows directory - # - ROM dll files (where are they?) - # - OEM specified search path: HKLM\Loader\SystemPath - def find_library(name): - return name - -if os.name == "posix" and sys.platform == "darwin": - from ctypes.macholib.dyld import dyld_find as _dyld_find - def find_library(name): - possible = ['lib%s.dylib' % name, - '%s.dylib' % name, - '%s.framework/%s' % (name, name)] - for name in possible: - try: - return _dyld_find(name) - except ValueError: - continue - return None - -elif os.name == "posix": - # Andreas Degert's find functions, using gcc, /sbin/ldconfig, objdump - import re, tempfile, errno - - def _findLib_gcc(name): - # Run GCC's linker with the -t (aka --trace) option and examine the - # library name it prints out. The GCC command will fail because we - # haven't supplied a proper program with main(), but that does not - # matter. - expr = r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name) - cmd = 'if type gcc >/dev/null 2>&1; then CC=gcc; elif type cc >/dev/null 2>&1; then CC=cc;else exit; fi;' \ - 'LANG=C LC_ALL=C $CC -Wl,-t -o "$2" 2>&1 -l"$1"' - - temp = tempfile.NamedTemporaryFile() - try: - proc = subprocess.Popen((cmd, '_findLib_gcc', name, temp.name), - shell=True, - stdout=subprocess.PIPE) - [trace, _] = proc.communicate() - finally: - try: - temp.close() - except OSError, e: - # ENOENT is raised if the file was already removed, which is - # the normal behaviour of GCC if linking fails - if e.errno != errno.ENOENT: - raise - res = re.search(expr, trace) - if not res: - return None - return res.group(0) - - - if sys.platform == "sunos5": - # use /usr/ccs/bin/dump on solaris - def _get_soname(f): - if not f: - return None - - null = open(os.devnull, "wb") - try: - with null: - proc = subprocess.Popen(("/usr/ccs/bin/dump", "-Lpv", f), - stdout=subprocess.PIPE, - stderr=null) - except OSError: # E.g. command not found - return None - [data, _] = proc.communicate() - res = re.search(br'\[.*\]\sSONAME\s+([^\s]+)', data) - if not res: - return None - return res.group(1) - else: - def _get_soname(f): - # assuming GNU binutils / ELF - if not f: - return None - cmd = 'if ! type objdump >/dev/null 2>&1; then exit; fi;' \ - 'objdump -p -j .dynamic 2>/dev/null "$1"' - proc = subprocess.Popen((cmd, '_get_soname', f), shell=True, - stdout=subprocess.PIPE) - [dump, _] = proc.communicate() - res = re.search(br'\sSONAME\s+([^\s]+)', dump) - if not res: - return None - return res.group(1) - - if (sys.platform.startswith("freebsd") - or sys.platform.startswith("openbsd") - or sys.platform.startswith("dragonfly")): - - def _num_version(libname): - # "libxyz.so.MAJOR.MINOR" => [ MAJOR, MINOR ] - parts = libname.split(b".") - nums = [] - try: - while parts: - nums.insert(0, int(parts.pop())) - except ValueError: - pass - return nums or [sys.maxint] - - def find_library(name): - ename = re.escape(name) - expr = r':-l%s\.\S+ => \S*/(lib%s\.\S+)' % (ename, ename) - - null = open(os.devnull, 'wb') - try: - with null: - proc = subprocess.Popen(('/sbin/ldconfig', '-r'), - stdout=subprocess.PIPE, - stderr=null) - except OSError: # E.g. command not found - data = b'' - else: - [data, _] = proc.communicate() - - res = re.findall(expr, data) - if not res: - return _get_soname(_findLib_gcc(name)) - res.sort(key=_num_version) - return res[-1] - - elif sys.platform == "sunos5": - - def _findLib_crle(name, is64): - if not os.path.exists('/usr/bin/crle'): - return None - - env = dict(os.environ) - env['LC_ALL'] = 'C' - - if is64: - args = ('/usr/bin/crle', '-64') - else: - args = ('/usr/bin/crle',) - - paths = None - null = open(os.devnull, 'wb') - try: - with null: - proc = subprocess.Popen(args, - stdout=subprocess.PIPE, - stderr=null, - env=env) - except OSError: # E.g. bad executable - return None - try: - for line in proc.stdout: - line = line.strip() - if line.startswith(b'Default Library Path (ELF):'): - paths = line.split()[4] - finally: - proc.stdout.close() - proc.wait() - - if not paths: - return None - - for dir in paths.split(":"): - libfile = os.path.join(dir, "lib%s.so" % name) - if os.path.exists(libfile): - return libfile - - return None - - def find_library(name, is64 = False): - return _get_soname(_findLib_crle(name, is64) or _findLib_gcc(name)) - - else: - - def _findSoname_ldconfig(name): - import struct - if struct.calcsize('l') == 4: - machine = os.uname()[4] + '-32' - else: - machine = os.uname()[4] + '-64' - mach_map = { - 'x86_64-64': 'libc6,x86-64', - 'ppc64-64': 'libc6,64bit', - 'sparc64-64': 'libc6,64bit', - 's390x-64': 'libc6,64bit', - 'ia64-64': 'libc6,IA-64', - } - abi_type = mach_map.get(machine, 'libc6') - - # XXX assuming GLIBC's ldconfig (with option -p) - expr = r'\s+(lib%s\.[^\s]+)\s+\(%s' % (re.escape(name), abi_type) - - env = dict(os.environ) - env['LC_ALL'] = 'C' - env['LANG'] = 'C' - null = open(os.devnull, 'wb') - try: - with null: - p = subprocess.Popen(['/sbin/ldconfig', '-p'], - stderr=null, - stdout=subprocess.PIPE, - env=env) - except OSError: # E.g. command not found - return None - [data, _] = p.communicate() - res = re.search(expr, data) - if not res: - return None - return res.group(1) - - def find_library(name): - return _findSoname_ldconfig(name) or _get_soname(_findLib_gcc(name)) - -################################################################ -# test code - -def test(): - from ctypes import cdll - if os.name == "nt": - print cdll.msvcrt - print cdll.load("msvcrt") - print find_library("msvcrt") - - if os.name == "posix": - # find and load_version - print find_library("m") - print find_library("c") - print find_library("bz2") - - # getattr -## print cdll.m -## print cdll.bz2 - - # load - if sys.platform == "darwin": - print cdll.LoadLibrary("libm.dylib") - print cdll.LoadLibrary("libcrypto.dylib") - print cdll.LoadLibrary("libSystem.dylib") - print cdll.LoadLibrary("System.framework/System") - else: - print cdll.LoadLibrary("libm.so") - print cdll.LoadLibrary("libcrypt.so") - print find_library("crypt") - -if __name__ == "__main__": - test() diff --git a/python/Lib/ctypes/wintypes.py b/python/Lib/ctypes/wintypes.py deleted file mode 100755 index e7f569c9b6..0000000000 --- a/python/Lib/ctypes/wintypes.py +++ /dev/null @@ -1,181 +0,0 @@ -# The most useful windows datatypes -from ctypes import * - -BYTE = c_byte -WORD = c_ushort -DWORD = c_ulong - -WCHAR = c_wchar -UINT = c_uint -INT = c_int - -DOUBLE = c_double -FLOAT = c_float - -BOOLEAN = BYTE -BOOL = c_long - -from ctypes import _SimpleCData -class VARIANT_BOOL(_SimpleCData): - _type_ = "v" - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.value) - -ULONG = c_ulong -LONG = c_long - -USHORT = c_ushort -SHORT = c_short - -# in the windows header files, these are structures. -_LARGE_INTEGER = LARGE_INTEGER = c_longlong -_ULARGE_INTEGER = ULARGE_INTEGER = c_ulonglong - -LPCOLESTR = LPOLESTR = OLESTR = c_wchar_p -LPCWSTR = LPWSTR = c_wchar_p -LPCSTR = LPSTR = c_char_p -LPCVOID = LPVOID = c_void_p - -# WPARAM is defined as UINT_PTR (unsigned type) -# LPARAM is defined as LONG_PTR (signed type) -if sizeof(c_long) == sizeof(c_void_p): - WPARAM = c_ulong - LPARAM = c_long -elif sizeof(c_longlong) == sizeof(c_void_p): - WPARAM = c_ulonglong - LPARAM = c_longlong - -ATOM = WORD -LANGID = WORD - -COLORREF = DWORD -LGRPID = DWORD -LCTYPE = DWORD - -LCID = DWORD - -################################################################ -# HANDLE types -HANDLE = c_void_p # in the header files: void * - -HACCEL = HANDLE -HBITMAP = HANDLE -HBRUSH = HANDLE -HCOLORSPACE = HANDLE -HDC = HANDLE -HDESK = HANDLE -HDWP = HANDLE -HENHMETAFILE = HANDLE -HFONT = HANDLE -HGDIOBJ = HANDLE -HGLOBAL = HANDLE -HHOOK = HANDLE -HICON = HANDLE -HINSTANCE = HANDLE -HKEY = HANDLE -HKL = HANDLE -HLOCAL = HANDLE -HMENU = HANDLE -HMETAFILE = HANDLE -HMODULE = HANDLE -HMONITOR = HANDLE -HPALETTE = HANDLE -HPEN = HANDLE -HRGN = HANDLE -HRSRC = HANDLE -HSTR = HANDLE -HTASK = HANDLE -HWINSTA = HANDLE -HWND = HANDLE -SC_HANDLE = HANDLE -SERVICE_STATUS_HANDLE = HANDLE - -################################################################ -# Some important structure definitions - -class RECT(Structure): - _fields_ = [("left", c_long), - ("top", c_long), - ("right", c_long), - ("bottom", c_long)] -tagRECT = _RECTL = RECTL = RECT - -class _SMALL_RECT(Structure): - _fields_ = [('Left', c_short), - ('Top', c_short), - ('Right', c_short), - ('Bottom', c_short)] -SMALL_RECT = _SMALL_RECT - -class _COORD(Structure): - _fields_ = [('X', c_short), - ('Y', c_short)] - -class POINT(Structure): - _fields_ = [("x", c_long), - ("y", c_long)] -tagPOINT = _POINTL = POINTL = POINT - -class SIZE(Structure): - _fields_ = [("cx", c_long), - ("cy", c_long)] -tagSIZE = SIZEL = SIZE - -def RGB(red, green, blue): - return red + (green << 8) + (blue << 16) - -class FILETIME(Structure): - _fields_ = [("dwLowDateTime", DWORD), - ("dwHighDateTime", DWORD)] -_FILETIME = FILETIME - -class MSG(Structure): - _fields_ = [("hWnd", HWND), - ("message", c_uint), - ("wParam", WPARAM), - ("lParam", LPARAM), - ("time", DWORD), - ("pt", POINT)] -tagMSG = MSG -MAX_PATH = 260 - -class WIN32_FIND_DATAA(Structure): - _fields_ = [("dwFileAttributes", DWORD), - ("ftCreationTime", FILETIME), - ("ftLastAccessTime", FILETIME), - ("ftLastWriteTime", FILETIME), - ("nFileSizeHigh", DWORD), - ("nFileSizeLow", DWORD), - ("dwReserved0", DWORD), - ("dwReserved1", DWORD), - ("cFileName", c_char * MAX_PATH), - ("cAlternateFileName", c_char * 14)] - -class WIN32_FIND_DATAW(Structure): - _fields_ = [("dwFileAttributes", DWORD), - ("ftCreationTime", FILETIME), - ("ftLastAccessTime", FILETIME), - ("ftLastWriteTime", FILETIME), - ("nFileSizeHigh", DWORD), - ("nFileSizeLow", DWORD), - ("dwReserved0", DWORD), - ("dwReserved1", DWORD), - ("cFileName", c_wchar * MAX_PATH), - ("cAlternateFileName", c_wchar * 14)] - -__all__ = ['ATOM', 'BOOL', 'BOOLEAN', 'BYTE', 'COLORREF', 'DOUBLE', 'DWORD', - 'FILETIME', 'FLOAT', 'HACCEL', 'HANDLE', 'HBITMAP', 'HBRUSH', - 'HCOLORSPACE', 'HDC', 'HDESK', 'HDWP', 'HENHMETAFILE', 'HFONT', - 'HGDIOBJ', 'HGLOBAL', 'HHOOK', 'HICON', 'HINSTANCE', 'HKEY', - 'HKL', 'HLOCAL', 'HMENU', 'HMETAFILE', 'HMODULE', 'HMONITOR', - 'HPALETTE', 'HPEN', 'HRGN', 'HRSRC', 'HSTR', 'HTASK', 'HWINSTA', - 'HWND', 'INT', 'LANGID', 'LARGE_INTEGER', 'LCID', 'LCTYPE', - 'LGRPID', 'LONG', 'LPARAM', 'LPCOLESTR', 'LPCSTR', 'LPCVOID', - 'LPCWSTR', 'LPOLESTR', 'LPSTR', 'LPVOID', 'LPWSTR', 'MAX_PATH', - 'MSG', 'OLESTR', 'POINT', 'POINTL', 'RECT', 'RECTL', 'RGB', - 'SC_HANDLE', 'SERVICE_STATUS_HANDLE', 'SHORT', 'SIZE', 'SIZEL', - 'SMALL_RECT', 'UINT', 'ULARGE_INTEGER', 'ULONG', 'USHORT', - 'VARIANT_BOOL', 'WCHAR', 'WIN32_FIND_DATAA', 'WIN32_FIND_DATAW', - 'WORD', 'WPARAM', '_COORD', '_FILETIME', '_LARGE_INTEGER', - '_POINTL', '_RECTL', '_SMALL_RECT', '_ULARGE_INTEGER', 'tagMSG', - 'tagPOINT', 'tagRECT', 'tagSIZE'] diff --git a/python/Lib/curses/__init__.py b/python/Lib/curses/__init__.py deleted file mode 100755 index ecf59de37d..0000000000 --- a/python/Lib/curses/__init__.py +++ /dev/null @@ -1,59 +0,0 @@ -"""curses - -The main package for curses support for Python. Normally used by importing -the package, and perhaps a particular module inside it. - - import curses - from curses import textpad - curses.initscr() - ... - -""" - -__revision__ = "$Id$" - -from _curses import * -from curses.wrapper import wrapper -import os as _os -import sys as _sys - -# Some constants, most notably the ACS_* ones, are only added to the C -# _curses module's dictionary after initscr() is called. (Some -# versions of SGI's curses don't define values for those constants -# until initscr() has been called.) This wrapper function calls the -# underlying C initscr(), and then copies the constants from the -# _curses module to the curses package's dictionary. Don't do 'from -# curses import *' if you'll be needing the ACS_* constants. - -def initscr(): - import _curses, curses - # we call setupterm() here because it raises an error - # instead of calling exit() in error cases. - setupterm(term=_os.environ.get("TERM", "unknown"), - fd=_sys.__stdout__.fileno()) - stdscr = _curses.initscr() - for key, value in _curses.__dict__.items(): - if key[0:4] == 'ACS_' or key in ('LINES', 'COLS'): - setattr(curses, key, value) - - return stdscr - -# This is a similar wrapper for start_color(), which adds the COLORS and -# COLOR_PAIRS variables which are only available after start_color() is -# called. - -def start_color(): - import _curses, curses - retval = _curses.start_color() - if hasattr(_curses, 'COLORS'): - curses.COLORS = _curses.COLORS - if hasattr(_curses, 'COLOR_PAIRS'): - curses.COLOR_PAIRS = _curses.COLOR_PAIRS - return retval - -# Import Python has_key() implementation if _curses doesn't contain has_key() - -try: - has_key -except NameError: - from has_key import has_key diff --git a/python/Lib/curses/ascii.py b/python/Lib/curses/ascii.py deleted file mode 100755 index a88f38b752..0000000000 --- a/python/Lib/curses/ascii.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Constants and membership tests for ASCII characters""" - -NUL = 0x00 # ^@ -SOH = 0x01 # ^A -STX = 0x02 # ^B -ETX = 0x03 # ^C -EOT = 0x04 # ^D -ENQ = 0x05 # ^E -ACK = 0x06 # ^F -BEL = 0x07 # ^G -BS = 0x08 # ^H -TAB = 0x09 # ^I -HT = 0x09 # ^I -LF = 0x0a # ^J -NL = 0x0a # ^J -VT = 0x0b # ^K -FF = 0x0c # ^L -CR = 0x0d # ^M -SO = 0x0e # ^N -SI = 0x0f # ^O -DLE = 0x10 # ^P -DC1 = 0x11 # ^Q -DC2 = 0x12 # ^R -DC3 = 0x13 # ^S -DC4 = 0x14 # ^T -NAK = 0x15 # ^U -SYN = 0x16 # ^V -ETB = 0x17 # ^W -CAN = 0x18 # ^X -EM = 0x19 # ^Y -SUB = 0x1a # ^Z -ESC = 0x1b # ^[ -FS = 0x1c # ^\ -GS = 0x1d # ^] -RS = 0x1e # ^^ -US = 0x1f # ^_ -SP = 0x20 # space -DEL = 0x7f # delete - -controlnames = [ -"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", -"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI", -"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", -"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", -"SP" -] - -def _ctoi(c): - if type(c) == type(""): - return ord(c) - else: - return c - -def isalnum(c): return isalpha(c) or isdigit(c) -def isalpha(c): return isupper(c) or islower(c) -def isascii(c): return _ctoi(c) <= 127 # ? -def isblank(c): return _ctoi(c) in (9, 32) -def iscntrl(c): return _ctoi(c) <= 31 or _ctoi(c) == 127 -def isdigit(c): return _ctoi(c) >= 48 and _ctoi(c) <= 57 -def isgraph(c): return _ctoi(c) >= 33 and _ctoi(c) <= 126 -def islower(c): return _ctoi(c) >= 97 and _ctoi(c) <= 122 -def isprint(c): return _ctoi(c) >= 32 and _ctoi(c) <= 126 -def ispunct(c): return isgraph(c) and not isalnum(c) -def isspace(c): return _ctoi(c) in (9, 10, 11, 12, 13, 32) -def isupper(c): return _ctoi(c) >= 65 and _ctoi(c) <= 90 -def isxdigit(c): return isdigit(c) or \ - (_ctoi(c) >= 65 and _ctoi(c) <= 70) or (_ctoi(c) >= 97 and _ctoi(c) <= 102) -def isctrl(c): return _ctoi(c) < 32 -def ismeta(c): return _ctoi(c) > 127 - -def ascii(c): - if type(c) == type(""): - return chr(_ctoi(c) & 0x7f) - else: - return _ctoi(c) & 0x7f - -def ctrl(c): - if type(c) == type(""): - return chr(_ctoi(c) & 0x1f) - else: - return _ctoi(c) & 0x1f - -def alt(c): - if type(c) == type(""): - return chr(_ctoi(c) | 0x80) - else: - return _ctoi(c) | 0x80 - -def unctrl(c): - bits = _ctoi(c) - if bits == 0x7f: - rep = "^?" - elif isprint(bits & 0x7f): - rep = chr(bits & 0x7f) - else: - rep = "^" + chr(((bits & 0x7f) | 0x20) + 0x20) - if bits & 0x80: - return "!" + rep - return rep diff --git a/python/Lib/curses/has_key.py b/python/Lib/curses/has_key.py deleted file mode 100755 index 1dd5a3bd4a..0000000000 --- a/python/Lib/curses/has_key.py +++ /dev/null @@ -1,192 +0,0 @@ - -# -# Emulation of has_key() function for platforms that don't use ncurses -# - -import _curses - -# Table mapping curses keys to the terminfo capability name - -_capability_names = { - _curses.KEY_A1: 'ka1', - _curses.KEY_A3: 'ka3', - _curses.KEY_B2: 'kb2', - _curses.KEY_BACKSPACE: 'kbs', - _curses.KEY_BEG: 'kbeg', - _curses.KEY_BTAB: 'kcbt', - _curses.KEY_C1: 'kc1', - _curses.KEY_C3: 'kc3', - _curses.KEY_CANCEL: 'kcan', - _curses.KEY_CATAB: 'ktbc', - _curses.KEY_CLEAR: 'kclr', - _curses.KEY_CLOSE: 'kclo', - _curses.KEY_COMMAND: 'kcmd', - _curses.KEY_COPY: 'kcpy', - _curses.KEY_CREATE: 'kcrt', - _curses.KEY_CTAB: 'kctab', - _curses.KEY_DC: 'kdch1', - _curses.KEY_DL: 'kdl1', - _curses.KEY_DOWN: 'kcud1', - _curses.KEY_EIC: 'krmir', - _curses.KEY_END: 'kend', - _curses.KEY_ENTER: 'kent', - _curses.KEY_EOL: 'kel', - _curses.KEY_EOS: 'ked', - _curses.KEY_EXIT: 'kext', - _curses.KEY_F0: 'kf0', - _curses.KEY_F1: 'kf1', - _curses.KEY_F10: 'kf10', - _curses.KEY_F11: 'kf11', - _curses.KEY_F12: 'kf12', - _curses.KEY_F13: 'kf13', - _curses.KEY_F14: 'kf14', - _curses.KEY_F15: 'kf15', - _curses.KEY_F16: 'kf16', - _curses.KEY_F17: 'kf17', - _curses.KEY_F18: 'kf18', - _curses.KEY_F19: 'kf19', - _curses.KEY_F2: 'kf2', - _curses.KEY_F20: 'kf20', - _curses.KEY_F21: 'kf21', - _curses.KEY_F22: 'kf22', - _curses.KEY_F23: 'kf23', - _curses.KEY_F24: 'kf24', - _curses.KEY_F25: 'kf25', - _curses.KEY_F26: 'kf26', - _curses.KEY_F27: 'kf27', - _curses.KEY_F28: 'kf28', - _curses.KEY_F29: 'kf29', - _curses.KEY_F3: 'kf3', - _curses.KEY_F30: 'kf30', - _curses.KEY_F31: 'kf31', - _curses.KEY_F32: 'kf32', - _curses.KEY_F33: 'kf33', - _curses.KEY_F34: 'kf34', - _curses.KEY_F35: 'kf35', - _curses.KEY_F36: 'kf36', - _curses.KEY_F37: 'kf37', - _curses.KEY_F38: 'kf38', - _curses.KEY_F39: 'kf39', - _curses.KEY_F4: 'kf4', - _curses.KEY_F40: 'kf40', - _curses.KEY_F41: 'kf41', - _curses.KEY_F42: 'kf42', - _curses.KEY_F43: 'kf43', - _curses.KEY_F44: 'kf44', - _curses.KEY_F45: 'kf45', - _curses.KEY_F46: 'kf46', - _curses.KEY_F47: 'kf47', - _curses.KEY_F48: 'kf48', - _curses.KEY_F49: 'kf49', - _curses.KEY_F5: 'kf5', - _curses.KEY_F50: 'kf50', - _curses.KEY_F51: 'kf51', - _curses.KEY_F52: 'kf52', - _curses.KEY_F53: 'kf53', - _curses.KEY_F54: 'kf54', - _curses.KEY_F55: 'kf55', - _curses.KEY_F56: 'kf56', - _curses.KEY_F57: 'kf57', - _curses.KEY_F58: 'kf58', - _curses.KEY_F59: 'kf59', - _curses.KEY_F6: 'kf6', - _curses.KEY_F60: 'kf60', - _curses.KEY_F61: 'kf61', - _curses.KEY_F62: 'kf62', - _curses.KEY_F63: 'kf63', - _curses.KEY_F7: 'kf7', - _curses.KEY_F8: 'kf8', - _curses.KEY_F9: 'kf9', - _curses.KEY_FIND: 'kfnd', - _curses.KEY_HELP: 'khlp', - _curses.KEY_HOME: 'khome', - _curses.KEY_IC: 'kich1', - _curses.KEY_IL: 'kil1', - _curses.KEY_LEFT: 'kcub1', - _curses.KEY_LL: 'kll', - _curses.KEY_MARK: 'kmrk', - _curses.KEY_MESSAGE: 'kmsg', - _curses.KEY_MOVE: 'kmov', - _curses.KEY_NEXT: 'knxt', - _curses.KEY_NPAGE: 'knp', - _curses.KEY_OPEN: 'kopn', - _curses.KEY_OPTIONS: 'kopt', - _curses.KEY_PPAGE: 'kpp', - _curses.KEY_PREVIOUS: 'kprv', - _curses.KEY_PRINT: 'kprt', - _curses.KEY_REDO: 'krdo', - _curses.KEY_REFERENCE: 'kref', - _curses.KEY_REFRESH: 'krfr', - _curses.KEY_REPLACE: 'krpl', - _curses.KEY_RESTART: 'krst', - _curses.KEY_RESUME: 'kres', - _curses.KEY_RIGHT: 'kcuf1', - _curses.KEY_SAVE: 'ksav', - _curses.KEY_SBEG: 'kBEG', - _curses.KEY_SCANCEL: 'kCAN', - _curses.KEY_SCOMMAND: 'kCMD', - _curses.KEY_SCOPY: 'kCPY', - _curses.KEY_SCREATE: 'kCRT', - _curses.KEY_SDC: 'kDC', - _curses.KEY_SDL: 'kDL', - _curses.KEY_SELECT: 'kslt', - _curses.KEY_SEND: 'kEND', - _curses.KEY_SEOL: 'kEOL', - _curses.KEY_SEXIT: 'kEXT', - _curses.KEY_SF: 'kind', - _curses.KEY_SFIND: 'kFND', - _curses.KEY_SHELP: 'kHLP', - _curses.KEY_SHOME: 'kHOM', - _curses.KEY_SIC: 'kIC', - _curses.KEY_SLEFT: 'kLFT', - _curses.KEY_SMESSAGE: 'kMSG', - _curses.KEY_SMOVE: 'kMOV', - _curses.KEY_SNEXT: 'kNXT', - _curses.KEY_SOPTIONS: 'kOPT', - _curses.KEY_SPREVIOUS: 'kPRV', - _curses.KEY_SPRINT: 'kPRT', - _curses.KEY_SR: 'kri', - _curses.KEY_SREDO: 'kRDO', - _curses.KEY_SREPLACE: 'kRPL', - _curses.KEY_SRIGHT: 'kRIT', - _curses.KEY_SRSUME: 'kRES', - _curses.KEY_SSAVE: 'kSAV', - _curses.KEY_SSUSPEND: 'kSPD', - _curses.KEY_STAB: 'khts', - _curses.KEY_SUNDO: 'kUND', - _curses.KEY_SUSPEND: 'kspd', - _curses.KEY_UNDO: 'kund', - _curses.KEY_UP: 'kcuu1' - } - -def has_key(ch): - if isinstance(ch, str): - ch = ord(ch) - - # Figure out the correct capability name for the keycode. - capability_name = _capability_names.get(ch) - if capability_name is None: - return False - - #Check the current terminal description for that capability; - #if present, return true, else return false. - if _curses.tigetstr( capability_name ): - return True - else: - return False - -if __name__ == '__main__': - # Compare the output of this implementation and the ncurses has_key, - # on platforms where has_key is already available - try: - L = [] - _curses.initscr() - for key in _capability_names.keys(): - system = key in _curses - python = has_key(key) - if system != python: - L.append( 'Mismatch for key %s, system=%i, Python=%i' - % (_curses.keyname( key ), system, python) ) - finally: - _curses.endwin() - for i in L: print i diff --git a/python/Lib/curses/panel.py b/python/Lib/curses/panel.py deleted file mode 100755 index aacca85151..0000000000 --- a/python/Lib/curses/panel.py +++ /dev/null @@ -1,8 +0,0 @@ -"""curses.panel - -Module for using panels with curses. -""" - -__revision__ = "$Id$" - -from _curses_panel import * diff --git a/python/Lib/curses/textpad.py b/python/Lib/curses/textpad.py deleted file mode 100755 index c45361c7d2..0000000000 --- a/python/Lib/curses/textpad.py +++ /dev/null @@ -1,188 +0,0 @@ -"""Simple textbox editing widget with Emacs-like keybindings.""" - -import curses -import curses.ascii - -def rectangle(win, uly, ulx, lry, lrx): - """Draw a rectangle with corners at the provided upper-left - and lower-right coordinates. - """ - win.vline(uly+1, ulx, curses.ACS_VLINE, lry - uly - 1) - win.hline(uly, ulx+1, curses.ACS_HLINE, lrx - ulx - 1) - win.hline(lry, ulx+1, curses.ACS_HLINE, lrx - ulx - 1) - win.vline(uly+1, lrx, curses.ACS_VLINE, lry - uly - 1) - win.addch(uly, ulx, curses.ACS_ULCORNER) - win.addch(uly, lrx, curses.ACS_URCORNER) - win.addch(lry, lrx, curses.ACS_LRCORNER) - win.addch(lry, ulx, curses.ACS_LLCORNER) - -class Textbox: - """Editing widget using the interior of a window object. - Supports the following Emacs-like key bindings: - - Ctrl-A Go to left edge of window. - Ctrl-B Cursor left, wrapping to previous line if appropriate. - Ctrl-D Delete character under cursor. - Ctrl-E Go to right edge (stripspaces off) or end of line (stripspaces on). - Ctrl-F Cursor right, wrapping to next line when appropriate. - Ctrl-G Terminate, returning the window contents. - Ctrl-H Delete character backward. - Ctrl-J Terminate if the window is 1 line, otherwise insert newline. - Ctrl-K If line is blank, delete it, otherwise clear to end of line. - Ctrl-L Refresh screen. - Ctrl-N Cursor down; move down one line. - Ctrl-O Insert a blank line at cursor location. - Ctrl-P Cursor up; move up one line. - - Move operations do nothing if the cursor is at an edge where the movement - is not possible. The following synonyms are supported where possible: - - KEY_LEFT = Ctrl-B, KEY_RIGHT = Ctrl-F, KEY_UP = Ctrl-P, KEY_DOWN = Ctrl-N - KEY_BACKSPACE = Ctrl-h - """ - def __init__(self, win, insert_mode=False): - self.win = win - self.insert_mode = insert_mode - (self.maxy, self.maxx) = win.getmaxyx() - self.maxy = self.maxy - 1 - self.maxx = self.maxx - 1 - self.stripspaces = 1 - self.lastcmd = None - win.keypad(1) - - def _end_of_line(self, y): - """Go to the location of the first blank on the given line, - returning the index of the last non-blank character.""" - last = self.maxx - while True: - if curses.ascii.ascii(self.win.inch(y, last)) != curses.ascii.SP: - last = min(self.maxx, last+1) - break - elif last == 0: - break - last = last - 1 - return last - - def _insert_printable_char(self, ch): - (y, x) = self.win.getyx() - if y < self.maxy or x < self.maxx: - if self.insert_mode: - oldch = self.win.inch() - # The try-catch ignores the error we trigger from some curses - # versions by trying to write into the lowest-rightmost spot - # in the window. - try: - self.win.addch(ch) - except curses.error: - pass - if self.insert_mode: - (backy, backx) = self.win.getyx() - if curses.ascii.isprint(oldch): - self._insert_printable_char(oldch) - self.win.move(backy, backx) - - def do_command(self, ch): - "Process a single editing command." - (y, x) = self.win.getyx() - self.lastcmd = ch - if curses.ascii.isprint(ch): - if y < self.maxy or x < self.maxx: - self._insert_printable_char(ch) - elif ch == curses.ascii.SOH: # ^a - self.win.move(y, 0) - elif ch in (curses.ascii.STX,curses.KEY_LEFT, curses.ascii.BS,curses.KEY_BACKSPACE): - if x > 0: - self.win.move(y, x-1) - elif y == 0: - pass - elif self.stripspaces: - self.win.move(y-1, self._end_of_line(y-1)) - else: - self.win.move(y-1, self.maxx) - if ch in (curses.ascii.BS, curses.KEY_BACKSPACE): - self.win.delch() - elif ch == curses.ascii.EOT: # ^d - self.win.delch() - elif ch == curses.ascii.ENQ: # ^e - if self.stripspaces: - self.win.move(y, self._end_of_line(y)) - else: - self.win.move(y, self.maxx) - elif ch in (curses.ascii.ACK, curses.KEY_RIGHT): # ^f - if x < self.maxx: - self.win.move(y, x+1) - elif y == self.maxy: - pass - else: - self.win.move(y+1, 0) - elif ch == curses.ascii.BEL: # ^g - return 0 - elif ch == curses.ascii.NL: # ^j - if self.maxy == 0: - return 0 - elif y < self.maxy: - self.win.move(y+1, 0) - elif ch == curses.ascii.VT: # ^k - if x == 0 and self._end_of_line(y) == 0: - self.win.deleteln() - else: - # first undo the effect of self._end_of_line - self.win.move(y, x) - self.win.clrtoeol() - elif ch == curses.ascii.FF: # ^l - self.win.refresh() - elif ch in (curses.ascii.SO, curses.KEY_DOWN): # ^n - if y < self.maxy: - self.win.move(y+1, x) - if x > self._end_of_line(y+1): - self.win.move(y+1, self._end_of_line(y+1)) - elif ch == curses.ascii.SI: # ^o - self.win.insertln() - elif ch in (curses.ascii.DLE, curses.KEY_UP): # ^p - if y > 0: - self.win.move(y-1, x) - if x > self._end_of_line(y-1): - self.win.move(y-1, self._end_of_line(y-1)) - return 1 - - def gather(self): - "Collect and return the contents of the window." - result = "" - for y in range(self.maxy+1): - self.win.move(y, 0) - stop = self._end_of_line(y) - if stop == 0 and self.stripspaces: - continue - for x in range(self.maxx+1): - if self.stripspaces and x > stop: - break - result = result + chr(curses.ascii.ascii(self.win.inch(y, x))) - if self.maxy > 0: - result = result + "\n" - return result - - def edit(self, validate=None): - "Edit in the widget window and collect the results." - while 1: - ch = self.win.getch() - if validate: - ch = validate(ch) - if not ch: - continue - if not self.do_command(ch): - break - self.win.refresh() - return self.gather() - -if __name__ == '__main__': - def test_editbox(stdscr): - ncols, nlines = 9, 4 - uly, ulx = 15, 20 - stdscr.addstr(uly-2, ulx, "Use Ctrl-G to end editing.") - win = curses.newwin(nlines, ncols, uly, ulx) - rectangle(stdscr, uly-1, ulx-1, uly + nlines, ulx + ncols) - stdscr.refresh() - return Textbox(win).edit() - - str = curses.wrapper(test_editbox) - print 'Contents of text box:', repr(str) diff --git a/python/Lib/curses/wrapper.py b/python/Lib/curses/wrapper.py deleted file mode 100755 index 5183ce741f..0000000000 --- a/python/Lib/curses/wrapper.py +++ /dev/null @@ -1,50 +0,0 @@ -"""curses.wrapper - -Contains one function, wrapper(), which runs another function which -should be the rest of your curses-based application. If the -application raises an exception, wrapper() will restore the terminal -to a sane state so you can read the resulting traceback. - -""" - -import curses - -def wrapper(func, *args, **kwds): - """Wrapper function that initializes curses and calls another function, - restoring normal keyboard/screen behavior on error. - The callable object 'func' is then passed the main window 'stdscr' - as its first argument, followed by any other arguments passed to - wrapper(). - """ - - try: - # Initialize curses - stdscr = curses.initscr() - - # Turn off echoing of keys, and enter cbreak mode, - # where no buffering is performed on keyboard input - curses.noecho() - curses.cbreak() - - # In keypad mode, escape sequences for special keys - # (like the cursor keys) will be interpreted and - # a special value like curses.KEY_LEFT will be returned - stdscr.keypad(1) - - # Start color, too. Harmless if the terminal doesn't have - # color; user can test with has_color() later on. The try/catch - # works around a minor bit of over-conscientiousness in the curses - # module -- the error return from C start_color() is ignorable. - try: - curses.start_color() - except: - pass - - return func(stdscr, *args, **kwds) - finally: - # Set everything back to normal - if 'stdscr' in locals(): - stdscr.keypad(0) - curses.echo() - curses.nocbreak() - curses.endwin() diff --git a/python/Lib/dbhash.py b/python/Lib/dbhash.py deleted file mode 100755 index a5d5375ba9..0000000000 --- a/python/Lib/dbhash.py +++ /dev/null @@ -1,18 +0,0 @@ -"""Provide a (g)dbm-compatible interface to bsddb.hashopen.""" - -import sys -import warnings -warnings.warnpy3k("in 3.x, the dbhash module has been removed", stacklevel=2) -try: - import bsddb -except ImportError: - # prevent a second import of this module from spuriously succeeding - del sys.modules[__name__] - raise - -__all__ = ["error","open"] - -error = bsddb.error # Exported for anydbm - -def open(file, flag = 'r', mode=0666): - return bsddb.hashopen(file, flag, mode) diff --git a/python/Lib/decimal.py b/python/Lib/decimal.py deleted file mode 100755 index 78a4daa621..0000000000 --- a/python/Lib/decimal.py +++ /dev/null @@ -1,6221 +0,0 @@ -# Copyright (c) 2004 Python Software Foundation. -# All rights reserved. - -# Written by Eric Price -# and Facundo Batista -# and Raymond Hettinger -# and Aahz -# and Tim Peters - -# This module is currently Py2.3 compatible and should be kept that way -# unless a major compelling advantage arises. IOW, 2.3 compatibility is -# strongly preferred, but not guaranteed. - -# Also, this module should be kept in sync with the latest updates of -# the IBM specification as it evolves. Those updates will be treated -# as bug fixes (deviation from the spec is a compatibility, usability -# bug) and will be backported. At this point the spec is stabilizing -# and the updates are becoming fewer, smaller, and less significant. - -""" -This is a Py2.3 implementation of decimal floating point arithmetic based on -the General Decimal Arithmetic Specification: - - http://speleotrove.com/decimal/decarith.html - -and IEEE standard 854-1987: - - http://en.wikipedia.org/wiki/IEEE_854-1987 - -Decimal floating point has finite precision with arbitrarily large bounds. - -The purpose of this module is to support arithmetic using familiar -"schoolhouse" rules and to avoid some of the tricky representation -issues associated with binary floating point. The package is especially -useful for financial applications or for contexts where users have -expectations that are at odds with binary floating point (for instance, -in binary floating point, 1.00 % 0.1 gives 0.09999999999999995 instead -of the expected Decimal('0.00') returned by decimal floating point). - -Here are some examples of using the decimal module: - ->>> from decimal import * ->>> setcontext(ExtendedContext) ->>> Decimal(0) -Decimal('0') ->>> Decimal('1') -Decimal('1') ->>> Decimal('-.0123') -Decimal('-0.0123') ->>> Decimal(123456) -Decimal('123456') ->>> Decimal('123.45e12345678901234567890') -Decimal('1.2345E+12345678901234567892') ->>> Decimal('1.33') + Decimal('1.27') -Decimal('2.60') ->>> Decimal('12.34') + Decimal('3.87') - Decimal('18.41') -Decimal('-2.20') ->>> dig = Decimal(1) ->>> print dig / Decimal(3) -0.333333333 ->>> getcontext().prec = 18 ->>> print dig / Decimal(3) -0.333333333333333333 ->>> print dig.sqrt() -1 ->>> print Decimal(3).sqrt() -1.73205080756887729 ->>> print Decimal(3) ** 123 -4.85192780976896427E+58 ->>> inf = Decimal(1) / Decimal(0) ->>> print inf -Infinity ->>> neginf = Decimal(-1) / Decimal(0) ->>> print neginf --Infinity ->>> print neginf + inf -NaN ->>> print neginf * inf --Infinity ->>> print dig / 0 -Infinity ->>> getcontext().traps[DivisionByZero] = 1 ->>> print dig / 0 -Traceback (most recent call last): - ... - ... - ... -DivisionByZero: x / 0 ->>> c = Context() ->>> c.traps[InvalidOperation] = 0 ->>> print c.flags[InvalidOperation] -0 ->>> c.divide(Decimal(0), Decimal(0)) -Decimal('NaN') ->>> c.traps[InvalidOperation] = 1 ->>> print c.flags[InvalidOperation] -1 ->>> c.flags[InvalidOperation] = 0 ->>> print c.flags[InvalidOperation] -0 ->>> print c.divide(Decimal(0), Decimal(0)) -Traceback (most recent call last): - ... - ... - ... -InvalidOperation: 0 / 0 ->>> print c.flags[InvalidOperation] -1 ->>> c.flags[InvalidOperation] = 0 ->>> c.traps[InvalidOperation] = 0 ->>> print c.divide(Decimal(0), Decimal(0)) -NaN ->>> print c.flags[InvalidOperation] -1 ->>> -""" - -__all__ = [ - # Two major classes - 'Decimal', 'Context', - - # Contexts - 'DefaultContext', 'BasicContext', 'ExtendedContext', - - # Exceptions - 'DecimalException', 'Clamped', 'InvalidOperation', 'DivisionByZero', - 'Inexact', 'Rounded', 'Subnormal', 'Overflow', 'Underflow', - - # Constants for use in setting up contexts - 'ROUND_DOWN', 'ROUND_HALF_UP', 'ROUND_HALF_EVEN', 'ROUND_CEILING', - 'ROUND_FLOOR', 'ROUND_UP', 'ROUND_HALF_DOWN', 'ROUND_05UP', - - # Functions for manipulating contexts - 'setcontext', 'getcontext', 'localcontext' -] - -__version__ = '1.70' # Highest version of the spec this complies with - -import math as _math -import numbers as _numbers - -try: - from collections import namedtuple as _namedtuple - DecimalTuple = _namedtuple('DecimalTuple', 'sign digits exponent') -except ImportError: - DecimalTuple = lambda *args: args - -# Rounding -ROUND_DOWN = 'ROUND_DOWN' -ROUND_HALF_UP = 'ROUND_HALF_UP' -ROUND_HALF_EVEN = 'ROUND_HALF_EVEN' -ROUND_CEILING = 'ROUND_CEILING' -ROUND_FLOOR = 'ROUND_FLOOR' -ROUND_UP = 'ROUND_UP' -ROUND_HALF_DOWN = 'ROUND_HALF_DOWN' -ROUND_05UP = 'ROUND_05UP' - -# Errors - -class DecimalException(ArithmeticError): - """Base exception class. - - Used exceptions derive from this. - If an exception derives from another exception besides this (such as - Underflow (Inexact, Rounded, Subnormal) that indicates that it is only - called if the others are present. This isn't actually used for - anything, though. - - handle -- Called when context._raise_error is called and the - trap_enabler is not set. First argument is self, second is the - context. More arguments can be given, those being after - the explanation in _raise_error (For example, - context._raise_error(NewError, '(-x)!', self._sign) would - call NewError().handle(context, self._sign).) - - To define a new exception, it should be sufficient to have it derive - from DecimalException. - """ - def handle(self, context, *args): - pass - - -class Clamped(DecimalException): - """Exponent of a 0 changed to fit bounds. - - This occurs and signals clamped if the exponent of a result has been - altered in order to fit the constraints of a specific concrete - representation. This may occur when the exponent of a zero result would - be outside the bounds of a representation, or when a large normal - number would have an encoded exponent that cannot be represented. In - this latter case, the exponent is reduced to fit and the corresponding - number of zero digits are appended to the coefficient ("fold-down"). - """ - -class InvalidOperation(DecimalException): - """An invalid operation was performed. - - Various bad things cause this: - - Something creates a signaling NaN - -INF + INF - 0 * (+-)INF - (+-)INF / (+-)INF - x % 0 - (+-)INF % x - x._rescale( non-integer ) - sqrt(-x) , x > 0 - 0 ** 0 - x ** (non-integer) - x ** (+-)INF - An operand is invalid - - The result of the operation after these is a quiet positive NaN, - except when the cause is a signaling NaN, in which case the result is - also a quiet NaN, but with the original sign, and an optional - diagnostic information. - """ - def handle(self, context, *args): - if args: - ans = _dec_from_triple(args[0]._sign, args[0]._int, 'n', True) - return ans._fix_nan(context) - return _NaN - -class ConversionSyntax(InvalidOperation): - """Trying to convert badly formed string. - - This occurs and signals invalid-operation if a string is being - converted to a number and it does not conform to the numeric string - syntax. The result is [0,qNaN]. - """ - def handle(self, context, *args): - return _NaN - -class DivisionByZero(DecimalException, ZeroDivisionError): - """Division by 0. - - This occurs and signals division-by-zero if division of a finite number - by zero was attempted (during a divide-integer or divide operation, or a - power operation with negative right-hand operand), and the dividend was - not zero. - - The result of the operation is [sign,inf], where sign is the exclusive - or of the signs of the operands for divide, or is 1 for an odd power of - -0, for power. - """ - - def handle(self, context, sign, *args): - return _SignedInfinity[sign] - -class DivisionImpossible(InvalidOperation): - """Cannot perform the division adequately. - - This occurs and signals invalid-operation if the integer result of a - divide-integer or remainder operation had too many digits (would be - longer than precision). The result is [0,qNaN]. - """ - - def handle(self, context, *args): - return _NaN - -class DivisionUndefined(InvalidOperation, ZeroDivisionError): - """Undefined result of division. - - This occurs and signals invalid-operation if division by zero was - attempted (during a divide-integer, divide, or remainder operation), and - the dividend is also zero. The result is [0,qNaN]. - """ - - def handle(self, context, *args): - return _NaN - -class Inexact(DecimalException): - """Had to round, losing information. - - This occurs and signals inexact whenever the result of an operation is - not exact (that is, it needed to be rounded and any discarded digits - were non-zero), or if an overflow or underflow condition occurs. The - result in all cases is unchanged. - - The inexact signal may be tested (or trapped) to determine if a given - operation (or sequence of operations) was inexact. - """ - -class InvalidContext(InvalidOperation): - """Invalid context. Unknown rounding, for example. - - This occurs and signals invalid-operation if an invalid context was - detected during an operation. This can occur if contexts are not checked - on creation and either the precision exceeds the capability of the - underlying concrete representation or an unknown or unsupported rounding - was specified. These aspects of the context need only be checked when - the values are required to be used. The result is [0,qNaN]. - """ - - def handle(self, context, *args): - return _NaN - -class Rounded(DecimalException): - """Number got rounded (not necessarily changed during rounding). - - This occurs and signals rounded whenever the result of an operation is - rounded (that is, some zero or non-zero digits were discarded from the - coefficient), or if an overflow or underflow condition occurs. The - result in all cases is unchanged. - - The rounded signal may be tested (or trapped) to determine if a given - operation (or sequence of operations) caused a loss of precision. - """ - -class Subnormal(DecimalException): - """Exponent < Emin before rounding. - - This occurs and signals subnormal whenever the result of a conversion or - operation is subnormal (that is, its adjusted exponent is less than - Emin, before any rounding). The result in all cases is unchanged. - - The subnormal signal may be tested (or trapped) to determine if a given - or operation (or sequence of operations) yielded a subnormal result. - """ - -class Overflow(Inexact, Rounded): - """Numerical overflow. - - This occurs and signals overflow if the adjusted exponent of a result - (from a conversion or from an operation that is not an attempt to divide - by zero), after rounding, would be greater than the largest value that - can be handled by the implementation (the value Emax). - - The result depends on the rounding mode: - - For round-half-up and round-half-even (and for round-half-down and - round-up, if implemented), the result of the operation is [sign,inf], - where sign is the sign of the intermediate result. For round-down, the - result is the largest finite number that can be represented in the - current precision, with the sign of the intermediate result. For - round-ceiling, the result is the same as for round-down if the sign of - the intermediate result is 1, or is [0,inf] otherwise. For round-floor, - the result is the same as for round-down if the sign of the intermediate - result is 0, or is [1,inf] otherwise. In all cases, Inexact and Rounded - will also be raised. - """ - - def handle(self, context, sign, *args): - if context.rounding in (ROUND_HALF_UP, ROUND_HALF_EVEN, - ROUND_HALF_DOWN, ROUND_UP): - return _SignedInfinity[sign] - if sign == 0: - if context.rounding == ROUND_CEILING: - return _SignedInfinity[sign] - return _dec_from_triple(sign, '9'*context.prec, - context.Emax-context.prec+1) - if sign == 1: - if context.rounding == ROUND_FLOOR: - return _SignedInfinity[sign] - return _dec_from_triple(sign, '9'*context.prec, - context.Emax-context.prec+1) - - -class Underflow(Inexact, Rounded, Subnormal): - """Numerical underflow with result rounded to 0. - - This occurs and signals underflow if a result is inexact and the - adjusted exponent of the result would be smaller (more negative) than - the smallest value that can be handled by the implementation (the value - Emin). That is, the result is both inexact and subnormal. - - The result after an underflow will be a subnormal number rounded, if - necessary, so that its exponent is not less than Etiny. This may result - in 0 with the sign of the intermediate result and an exponent of Etiny. - - In all cases, Inexact, Rounded, and Subnormal will also be raised. - """ - -# List of public traps and flags -_signals = [Clamped, DivisionByZero, Inexact, Overflow, Rounded, - Underflow, InvalidOperation, Subnormal] - -# Map conditions (per the spec) to signals -_condition_map = {ConversionSyntax:InvalidOperation, - DivisionImpossible:InvalidOperation, - DivisionUndefined:InvalidOperation, - InvalidContext:InvalidOperation} - -##### Context Functions ################################################## - -# The getcontext() and setcontext() function manage access to a thread-local -# current context. Py2.4 offers direct support for thread locals. If that -# is not available, use threading.currentThread() which is slower but will -# work for older Pythons. If threads are not part of the build, create a -# mock threading object with threading.local() returning the module namespace. - -try: - import threading -except ImportError: - # Python was compiled without threads; create a mock object instead - import sys - class MockThreading(object): - def local(self, sys=sys): - return sys.modules[__name__] - threading = MockThreading() - del sys, MockThreading - -try: - threading.local - -except AttributeError: - - # To fix reloading, force it to create a new context - # Old contexts have different exceptions in their dicts, making problems. - if hasattr(threading.currentThread(), '__decimal_context__'): - del threading.currentThread().__decimal_context__ - - def setcontext(context): - """Set this thread's context to context.""" - if context in (DefaultContext, BasicContext, ExtendedContext): - context = context.copy() - context.clear_flags() - threading.currentThread().__decimal_context__ = context - - def getcontext(): - """Returns this thread's context. - - If this thread does not yet have a context, returns - a new context and sets this thread's context. - New contexts are copies of DefaultContext. - """ - try: - return threading.currentThread().__decimal_context__ - except AttributeError: - context = Context() - threading.currentThread().__decimal_context__ = context - return context - -else: - - local = threading.local() - if hasattr(local, '__decimal_context__'): - del local.__decimal_context__ - - def getcontext(_local=local): - """Returns this thread's context. - - If this thread does not yet have a context, returns - a new context and sets this thread's context. - New contexts are copies of DefaultContext. - """ - try: - return _local.__decimal_context__ - except AttributeError: - context = Context() - _local.__decimal_context__ = context - return context - - def setcontext(context, _local=local): - """Set this thread's context to context.""" - if context in (DefaultContext, BasicContext, ExtendedContext): - context = context.copy() - context.clear_flags() - _local.__decimal_context__ = context - - del threading, local # Don't contaminate the namespace - -def localcontext(ctx=None): - """Return a context manager for a copy of the supplied context - - Uses a copy of the current context if no context is specified - The returned context manager creates a local decimal context - in a with statement: - def sin(x): - with localcontext() as ctx: - ctx.prec += 2 - # Rest of sin calculation algorithm - # uses a precision 2 greater than normal - return +s # Convert result to normal precision - - def sin(x): - with localcontext(ExtendedContext): - # Rest of sin calculation algorithm - # uses the Extended Context from the - # General Decimal Arithmetic Specification - return +s # Convert result to normal context - - >>> setcontext(DefaultContext) - >>> print getcontext().prec - 28 - >>> with localcontext(): - ... ctx = getcontext() - ... ctx.prec += 2 - ... print ctx.prec - ... - 30 - >>> with localcontext(ExtendedContext): - ... print getcontext().prec - ... - 9 - >>> print getcontext().prec - 28 - """ - if ctx is None: ctx = getcontext() - return _ContextManager(ctx) - - -##### Decimal class ####################################################### - -class Decimal(object): - """Floating point class for decimal arithmetic.""" - - __slots__ = ('_exp','_int','_sign', '_is_special') - # Generally, the value of the Decimal instance is given by - # (-1)**_sign * _int * 10**_exp - # Special values are signified by _is_special == True - - # We're immutable, so use __new__ not __init__ - def __new__(cls, value="0", context=None): - """Create a decimal point instance. - - >>> Decimal('3.14') # string input - Decimal('3.14') - >>> Decimal((0, (3, 1, 4), -2)) # tuple (sign, digit_tuple, exponent) - Decimal('3.14') - >>> Decimal(314) # int or long - Decimal('314') - >>> Decimal(Decimal(314)) # another decimal instance - Decimal('314') - >>> Decimal(' 3.14 \\n') # leading and trailing whitespace okay - Decimal('3.14') - """ - - # Note that the coefficient, self._int, is actually stored as - # a string rather than as a tuple of digits. This speeds up - # the "digits to integer" and "integer to digits" conversions - # that are used in almost every arithmetic operation on - # Decimals. This is an internal detail: the as_tuple function - # and the Decimal constructor still deal with tuples of - # digits. - - self = object.__new__(cls) - - # From a string - # REs insist on real strings, so we can too. - if isinstance(value, basestring): - m = _parser(value.strip()) - if m is None: - if context is None: - context = getcontext() - return context._raise_error(ConversionSyntax, - "Invalid literal for Decimal: %r" % value) - - if m.group('sign') == "-": - self._sign = 1 - else: - self._sign = 0 - intpart = m.group('int') - if intpart is not None: - # finite number - fracpart = m.group('frac') or '' - exp = int(m.group('exp') or '0') - self._int = str(int(intpart+fracpart)) - self._exp = exp - len(fracpart) - self._is_special = False - else: - diag = m.group('diag') - if diag is not None: - # NaN - self._int = str(int(diag or '0')).lstrip('0') - if m.group('signal'): - self._exp = 'N' - else: - self._exp = 'n' - else: - # infinity - self._int = '0' - self._exp = 'F' - self._is_special = True - return self - - # From an integer - if isinstance(value, (int,long)): - if value >= 0: - self._sign = 0 - else: - self._sign = 1 - self._exp = 0 - self._int = str(abs(value)) - self._is_special = False - return self - - # From another decimal - if isinstance(value, Decimal): - self._exp = value._exp - self._sign = value._sign - self._int = value._int - self._is_special = value._is_special - return self - - # From an internal working value - if isinstance(value, _WorkRep): - self._sign = value.sign - self._int = str(value.int) - self._exp = int(value.exp) - self._is_special = False - return self - - # tuple/list conversion (possibly from as_tuple()) - if isinstance(value, (list,tuple)): - if len(value) != 3: - raise ValueError('Invalid tuple size in creation of Decimal ' - 'from list or tuple. The list or tuple ' - 'should have exactly three elements.') - # process sign. The isinstance test rejects floats - if not (isinstance(value[0], (int, long)) and value[0] in (0,1)): - raise ValueError("Invalid sign. The first value in the tuple " - "should be an integer; either 0 for a " - "positive number or 1 for a negative number.") - self._sign = value[0] - if value[2] == 'F': - # infinity: value[1] is ignored - self._int = '0' - self._exp = value[2] - self._is_special = True - else: - # process and validate the digits in value[1] - digits = [] - for digit in value[1]: - if isinstance(digit, (int, long)) and 0 <= digit <= 9: - # skip leading zeros - if digits or digit != 0: - digits.append(digit) - else: - raise ValueError("The second value in the tuple must " - "be composed of integers in the range " - "0 through 9.") - if value[2] in ('n', 'N'): - # NaN: digits form the diagnostic - self._int = ''.join(map(str, digits)) - self._exp = value[2] - self._is_special = True - elif isinstance(value[2], (int, long)): - # finite number: digits give the coefficient - self._int = ''.join(map(str, digits or [0])) - self._exp = value[2] - self._is_special = False - else: - raise ValueError("The third value in the tuple must " - "be an integer, or one of the " - "strings 'F', 'n', 'N'.") - return self - - if isinstance(value, float): - value = Decimal.from_float(value) - self._exp = value._exp - self._sign = value._sign - self._int = value._int - self._is_special = value._is_special - return self - - raise TypeError("Cannot convert %r to Decimal" % value) - - # @classmethod, but @decorator is not valid Python 2.3 syntax, so - # don't use it (see notes on Py2.3 compatibility at top of file) - def from_float(cls, f): - """Converts a float to a decimal number, exactly. - - Note that Decimal.from_float(0.1) is not the same as Decimal('0.1'). - Since 0.1 is not exactly representable in binary floating point, the - value is stored as the nearest representable value which is - 0x1.999999999999ap-4. The exact equivalent of the value in decimal - is 0.1000000000000000055511151231257827021181583404541015625. - - >>> Decimal.from_float(0.1) - Decimal('0.1000000000000000055511151231257827021181583404541015625') - >>> Decimal.from_float(float('nan')) - Decimal('NaN') - >>> Decimal.from_float(float('inf')) - Decimal('Infinity') - >>> Decimal.from_float(-float('inf')) - Decimal('-Infinity') - >>> Decimal.from_float(-0.0) - Decimal('-0') - - """ - if isinstance(f, (int, long)): # handle integer inputs - return cls(f) - if _math.isinf(f) or _math.isnan(f): # raises TypeError if not a float - return cls(repr(f)) - if _math.copysign(1.0, f) == 1.0: - sign = 0 - else: - sign = 1 - n, d = abs(f).as_integer_ratio() - k = d.bit_length() - 1 - result = _dec_from_triple(sign, str(n*5**k), -k) - if cls is Decimal: - return result - else: - return cls(result) - from_float = classmethod(from_float) - - def _isnan(self): - """Returns whether the number is not actually one. - - 0 if a number - 1 if NaN - 2 if sNaN - """ - if self._is_special: - exp = self._exp - if exp == 'n': - return 1 - elif exp == 'N': - return 2 - return 0 - - def _isinfinity(self): - """Returns whether the number is infinite - - 0 if finite or not a number - 1 if +INF - -1 if -INF - """ - if self._exp == 'F': - if self._sign: - return -1 - return 1 - return 0 - - def _check_nans(self, other=None, context=None): - """Returns whether the number is not actually one. - - if self, other are sNaN, signal - if self, other are NaN return nan - return 0 - - Done before operations. - """ - - self_is_nan = self._isnan() - if other is None: - other_is_nan = False - else: - other_is_nan = other._isnan() - - if self_is_nan or other_is_nan: - if context is None: - context = getcontext() - - if self_is_nan == 2: - return context._raise_error(InvalidOperation, 'sNaN', - self) - if other_is_nan == 2: - return context._raise_error(InvalidOperation, 'sNaN', - other) - if self_is_nan: - return self._fix_nan(context) - - return other._fix_nan(context) - return 0 - - def _compare_check_nans(self, other, context): - """Version of _check_nans used for the signaling comparisons - compare_signal, __le__, __lt__, __ge__, __gt__. - - Signal InvalidOperation if either self or other is a (quiet - or signaling) NaN. Signaling NaNs take precedence over quiet - NaNs. - - Return 0 if neither operand is a NaN. - - """ - if context is None: - context = getcontext() - - if self._is_special or other._is_special: - if self.is_snan(): - return context._raise_error(InvalidOperation, - 'comparison involving sNaN', - self) - elif other.is_snan(): - return context._raise_error(InvalidOperation, - 'comparison involving sNaN', - other) - elif self.is_qnan(): - return context._raise_error(InvalidOperation, - 'comparison involving NaN', - self) - elif other.is_qnan(): - return context._raise_error(InvalidOperation, - 'comparison involving NaN', - other) - return 0 - - def __nonzero__(self): - """Return True if self is nonzero; otherwise return False. - - NaNs and infinities are considered nonzero. - """ - return self._is_special or self._int != '0' - - def _cmp(self, other): - """Compare the two non-NaN decimal instances self and other. - - Returns -1 if self < other, 0 if self == other and 1 - if self > other. This routine is for internal use only.""" - - if self._is_special or other._is_special: - self_inf = self._isinfinity() - other_inf = other._isinfinity() - if self_inf == other_inf: - return 0 - elif self_inf < other_inf: - return -1 - else: - return 1 - - # check for zeros; Decimal('0') == Decimal('-0') - if not self: - if not other: - return 0 - else: - return -((-1)**other._sign) - if not other: - return (-1)**self._sign - - # If different signs, neg one is less - if other._sign < self._sign: - return -1 - if self._sign < other._sign: - return 1 - - self_adjusted = self.adjusted() - other_adjusted = other.adjusted() - if self_adjusted == other_adjusted: - self_padded = self._int + '0'*(self._exp - other._exp) - other_padded = other._int + '0'*(other._exp - self._exp) - if self_padded == other_padded: - return 0 - elif self_padded < other_padded: - return -(-1)**self._sign - else: - return (-1)**self._sign - elif self_adjusted > other_adjusted: - return (-1)**self._sign - else: # self_adjusted < other_adjusted - return -((-1)**self._sign) - - # Note: The Decimal standard doesn't cover rich comparisons for - # Decimals. In particular, the specification is silent on the - # subject of what should happen for a comparison involving a NaN. - # We take the following approach: - # - # == comparisons involving a quiet NaN always return False - # != comparisons involving a quiet NaN always return True - # == or != comparisons involving a signaling NaN signal - # InvalidOperation, and return False or True as above if the - # InvalidOperation is not trapped. - # <, >, <= and >= comparisons involving a (quiet or signaling) - # NaN signal InvalidOperation, and return False if the - # InvalidOperation is not trapped. - # - # This behavior is designed to conform as closely as possible to - # that specified by IEEE 754. - - def __eq__(self, other, context=None): - other = _convert_other(other, allow_float=True) - if other is NotImplemented: - return other - if self._check_nans(other, context): - return False - return self._cmp(other) == 0 - - def __ne__(self, other, context=None): - other = _convert_other(other, allow_float=True) - if other is NotImplemented: - return other - if self._check_nans(other, context): - return True - return self._cmp(other) != 0 - - def __lt__(self, other, context=None): - other = _convert_other(other, allow_float=True) - if other is NotImplemented: - return other - ans = self._compare_check_nans(other, context) - if ans: - return False - return self._cmp(other) < 0 - - def __le__(self, other, context=None): - other = _convert_other(other, allow_float=True) - if other is NotImplemented: - return other - ans = self._compare_check_nans(other, context) - if ans: - return False - return self._cmp(other) <= 0 - - def __gt__(self, other, context=None): - other = _convert_other(other, allow_float=True) - if other is NotImplemented: - return other - ans = self._compare_check_nans(other, context) - if ans: - return False - return self._cmp(other) > 0 - - def __ge__(self, other, context=None): - other = _convert_other(other, allow_float=True) - if other is NotImplemented: - return other - ans = self._compare_check_nans(other, context) - if ans: - return False - return self._cmp(other) >= 0 - - def compare(self, other, context=None): - """Compares one to another. - - -1 => a < b - 0 => a = b - 1 => a > b - NaN => one is NaN - Like __cmp__, but returns Decimal instances. - """ - other = _convert_other(other, raiseit=True) - - # Compare(NaN, NaN) = NaN - if (self._is_special or other and other._is_special): - ans = self._check_nans(other, context) - if ans: - return ans - - return Decimal(self._cmp(other)) - - def __hash__(self): - """x.__hash__() <==> hash(x)""" - # Decimal integers must hash the same as the ints - # - # The hash of a nonspecial noninteger Decimal must depend only - # on the value of that Decimal, and not on its representation. - # For example: hash(Decimal('100E-1')) == hash(Decimal('10')). - - # Equality comparisons involving signaling nans can raise an - # exception; since equality checks are implicitly and - # unpredictably used when checking set and dict membership, we - # prevent signaling nans from being used as set elements or - # dict keys by making __hash__ raise an exception. - if self._is_special: - if self.is_snan(): - raise TypeError('Cannot hash a signaling NaN value.') - elif self.is_nan(): - # 0 to match hash(float('nan')) - return 0 - else: - # values chosen to match hash(float('inf')) and - # hash(float('-inf')). - if self._sign: - return -271828 - else: - return 314159 - - # In Python 2.7, we're allowing comparisons (but not - # arithmetic operations) between floats and Decimals; so if - # a Decimal instance is exactly representable as a float then - # its hash should match that of the float. - self_as_float = float(self) - if Decimal.from_float(self_as_float) == self: - return hash(self_as_float) - - if self._isinteger(): - op = _WorkRep(self.to_integral_value()) - # to make computation feasible for Decimals with large - # exponent, we use the fact that hash(n) == hash(m) for - # any two nonzero integers n and m such that (i) n and m - # have the same sign, and (ii) n is congruent to m modulo - # 2**64-1. So we can replace hash((-1)**s*c*10**e) with - # hash((-1)**s*c*pow(10, e, 2**64-1). - return hash((-1)**op.sign*op.int*pow(10, op.exp, 2**64-1)) - # The value of a nonzero nonspecial Decimal instance is - # faithfully represented by the triple consisting of its sign, - # its adjusted exponent, and its coefficient with trailing - # zeros removed. - return hash((self._sign, - self._exp+len(self._int), - self._int.rstrip('0'))) - - def as_tuple(self): - """Represents the number as a triple tuple. - - To show the internals exactly as they are. - """ - return DecimalTuple(self._sign, tuple(map(int, self._int)), self._exp) - - def __repr__(self): - """Represents the number as an instance of Decimal.""" - # Invariant: eval(repr(d)) == d - return "Decimal('%s')" % str(self) - - def __str__(self, eng=False, context=None): - """Return string representation of the number in scientific notation. - - Captures all of the information in the underlying representation. - """ - - sign = ['', '-'][self._sign] - if self._is_special: - if self._exp == 'F': - return sign + 'Infinity' - elif self._exp == 'n': - return sign + 'NaN' + self._int - else: # self._exp == 'N' - return sign + 'sNaN' + self._int - - # number of digits of self._int to left of decimal point - leftdigits = self._exp + len(self._int) - - # dotplace is number of digits of self._int to the left of the - # decimal point in the mantissa of the output string (that is, - # after adjusting the exponent) - if self._exp <= 0 and leftdigits > -6: - # no exponent required - dotplace = leftdigits - elif not eng: - # usual scientific notation: 1 digit on left of the point - dotplace = 1 - elif self._int == '0': - # engineering notation, zero - dotplace = (leftdigits + 1) % 3 - 1 - else: - # engineering notation, nonzero - dotplace = (leftdigits - 1) % 3 + 1 - - if dotplace <= 0: - intpart = '0' - fracpart = '.' + '0'*(-dotplace) + self._int - elif dotplace >= len(self._int): - intpart = self._int+'0'*(dotplace-len(self._int)) - fracpart = '' - else: - intpart = self._int[:dotplace] - fracpart = '.' + self._int[dotplace:] - if leftdigits == dotplace: - exp = '' - else: - if context is None: - context = getcontext() - exp = ['e', 'E'][context.capitals] + "%+d" % (leftdigits-dotplace) - - return sign + intpart + fracpart + exp - - def to_eng_string(self, context=None): - """Convert to a string, using engineering notation if an exponent is needed. - - Engineering notation has an exponent which is a multiple of 3. This - can leave up to 3 digits to the left of the decimal place and may - require the addition of either one or two trailing zeros. - """ - return self.__str__(eng=True, context=context) - - def __neg__(self, context=None): - """Returns a copy with the sign switched. - - Rounds, if it has reason. - """ - if self._is_special: - ans = self._check_nans(context=context) - if ans: - return ans - - if context is None: - context = getcontext() - - if not self and context.rounding != ROUND_FLOOR: - # -Decimal('0') is Decimal('0'), not Decimal('-0'), except - # in ROUND_FLOOR rounding mode. - ans = self.copy_abs() - else: - ans = self.copy_negate() - - return ans._fix(context) - - def __pos__(self, context=None): - """Returns a copy, unless it is a sNaN. - - Rounds the number (if more than precision digits) - """ - if self._is_special: - ans = self._check_nans(context=context) - if ans: - return ans - - if context is None: - context = getcontext() - - if not self and context.rounding != ROUND_FLOOR: - # + (-0) = 0, except in ROUND_FLOOR rounding mode. - ans = self.copy_abs() - else: - ans = Decimal(self) - - return ans._fix(context) - - def __abs__(self, round=True, context=None): - """Returns the absolute value of self. - - If the keyword argument 'round' is false, do not round. The - expression self.__abs__(round=False) is equivalent to - self.copy_abs(). - """ - if not round: - return self.copy_abs() - - if self._is_special: - ans = self._check_nans(context=context) - if ans: - return ans - - if self._sign: - ans = self.__neg__(context=context) - else: - ans = self.__pos__(context=context) - - return ans - - def __add__(self, other, context=None): - """Returns self + other. - - -INF + INF (or the reverse) cause InvalidOperation errors. - """ - other = _convert_other(other) - if other is NotImplemented: - return other - - if context is None: - context = getcontext() - - if self._is_special or other._is_special: - ans = self._check_nans(other, context) - if ans: - return ans - - if self._isinfinity(): - # If both INF, same sign => same as both, opposite => error. - if self._sign != other._sign and other._isinfinity(): - return context._raise_error(InvalidOperation, '-INF + INF') - return Decimal(self) - if other._isinfinity(): - return Decimal(other) # Can't both be infinity here - - exp = min(self._exp, other._exp) - negativezero = 0 - if context.rounding == ROUND_FLOOR and self._sign != other._sign: - # If the answer is 0, the sign should be negative, in this case. - negativezero = 1 - - if not self and not other: - sign = min(self._sign, other._sign) - if negativezero: - sign = 1 - ans = _dec_from_triple(sign, '0', exp) - ans = ans._fix(context) - return ans - if not self: - exp = max(exp, other._exp - context.prec-1) - ans = other._rescale(exp, context.rounding) - ans = ans._fix(context) - return ans - if not other: - exp = max(exp, self._exp - context.prec-1) - ans = self._rescale(exp, context.rounding) - ans = ans._fix(context) - return ans - - op1 = _WorkRep(self) - op2 = _WorkRep(other) - op1, op2 = _normalize(op1, op2, context.prec) - - result = _WorkRep() - if op1.sign != op2.sign: - # Equal and opposite - if op1.int == op2.int: - ans = _dec_from_triple(negativezero, '0', exp) - ans = ans._fix(context) - return ans - if op1.int < op2.int: - op1, op2 = op2, op1 - # OK, now abs(op1) > abs(op2) - if op1.sign == 1: - result.sign = 1 - op1.sign, op2.sign = op2.sign, op1.sign - else: - result.sign = 0 - # So we know the sign, and op1 > 0. - elif op1.sign == 1: - result.sign = 1 - op1.sign, op2.sign = (0, 0) - else: - result.sign = 0 - # Now, op1 > abs(op2) > 0 - - if op2.sign == 0: - result.int = op1.int + op2.int - else: - result.int = op1.int - op2.int - - result.exp = op1.exp - ans = Decimal(result) - ans = ans._fix(context) - return ans - - __radd__ = __add__ - - def __sub__(self, other, context=None): - """Return self - other""" - other = _convert_other(other) - if other is NotImplemented: - return other - - if self._is_special or other._is_special: - ans = self._check_nans(other, context=context) - if ans: - return ans - - # self - other is computed as self + other.copy_negate() - return self.__add__(other.copy_negate(), context=context) - - def __rsub__(self, other, context=None): - """Return other - self""" - other = _convert_other(other) - if other is NotImplemented: - return other - - return other.__sub__(self, context=context) - - def __mul__(self, other, context=None): - """Return self * other. - - (+-) INF * 0 (or its reverse) raise InvalidOperation. - """ - other = _convert_other(other) - if other is NotImplemented: - return other - - if context is None: - context = getcontext() - - resultsign = self._sign ^ other._sign - - if self._is_special or other._is_special: - ans = self._check_nans(other, context) - if ans: - return ans - - if self._isinfinity(): - if not other: - return context._raise_error(InvalidOperation, '(+-)INF * 0') - return _SignedInfinity[resultsign] - - if other._isinfinity(): - if not self: - return context._raise_error(InvalidOperation, '0 * (+-)INF') - return _SignedInfinity[resultsign] - - resultexp = self._exp + other._exp - - # Special case for multiplying by zero - if not self or not other: - ans = _dec_from_triple(resultsign, '0', resultexp) - # Fixing in case the exponent is out of bounds - ans = ans._fix(context) - return ans - - # Special case for multiplying by power of 10 - if self._int == '1': - ans = _dec_from_triple(resultsign, other._int, resultexp) - ans = ans._fix(context) - return ans - if other._int == '1': - ans = _dec_from_triple(resultsign, self._int, resultexp) - ans = ans._fix(context) - return ans - - op1 = _WorkRep(self) - op2 = _WorkRep(other) - - ans = _dec_from_triple(resultsign, str(op1.int * op2.int), resultexp) - ans = ans._fix(context) - - return ans - __rmul__ = __mul__ - - def __truediv__(self, other, context=None): - """Return self / other.""" - other = _convert_other(other) - if other is NotImplemented: - return NotImplemented - - if context is None: - context = getcontext() - - sign = self._sign ^ other._sign - - if self._is_special or other._is_special: - ans = self._check_nans(other, context) - if ans: - return ans - - if self._isinfinity() and other._isinfinity(): - return context._raise_error(InvalidOperation, '(+-)INF/(+-)INF') - - if self._isinfinity(): - return _SignedInfinity[sign] - - if other._isinfinity(): - context._raise_error(Clamped, 'Division by infinity') - return _dec_from_triple(sign, '0', context.Etiny()) - - # Special cases for zeroes - if not other: - if not self: - return context._raise_error(DivisionUndefined, '0 / 0') - return context._raise_error(DivisionByZero, 'x / 0', sign) - - if not self: - exp = self._exp - other._exp - coeff = 0 - else: - # OK, so neither = 0, INF or NaN - shift = len(other._int) - len(self._int) + context.prec + 1 - exp = self._exp - other._exp - shift - op1 = _WorkRep(self) - op2 = _WorkRep(other) - if shift >= 0: - coeff, remainder = divmod(op1.int * 10**shift, op2.int) - else: - coeff, remainder = divmod(op1.int, op2.int * 10**-shift) - if remainder: - # result is not exact; adjust to ensure correct rounding - if coeff % 5 == 0: - coeff += 1 - else: - # result is exact; get as close to ideal exponent as possible - ideal_exp = self._exp - other._exp - while exp < ideal_exp and coeff % 10 == 0: - coeff //= 10 - exp += 1 - - ans = _dec_from_triple(sign, str(coeff), exp) - return ans._fix(context) - - def _divide(self, other, context): - """Return (self // other, self % other), to context.prec precision. - - Assumes that neither self nor other is a NaN, that self is not - infinite and that other is nonzero. - """ - sign = self._sign ^ other._sign - if other._isinfinity(): - ideal_exp = self._exp - else: - ideal_exp = min(self._exp, other._exp) - - expdiff = self.adjusted() - other.adjusted() - if not self or other._isinfinity() or expdiff <= -2: - return (_dec_from_triple(sign, '0', 0), - self._rescale(ideal_exp, context.rounding)) - if expdiff <= context.prec: - op1 = _WorkRep(self) - op2 = _WorkRep(other) - if op1.exp >= op2.exp: - op1.int *= 10**(op1.exp - op2.exp) - else: - op2.int *= 10**(op2.exp - op1.exp) - q, r = divmod(op1.int, op2.int) - if q < 10**context.prec: - return (_dec_from_triple(sign, str(q), 0), - _dec_from_triple(self._sign, str(r), ideal_exp)) - - # Here the quotient is too large to be representable - ans = context._raise_error(DivisionImpossible, - 'quotient too large in //, % or divmod') - return ans, ans - - def __rtruediv__(self, other, context=None): - """Swaps self/other and returns __truediv__.""" - other = _convert_other(other) - if other is NotImplemented: - return other - return other.__truediv__(self, context=context) - - __div__ = __truediv__ - __rdiv__ = __rtruediv__ - - def __divmod__(self, other, context=None): - """ - Return (self // other, self % other) - """ - other = _convert_other(other) - if other is NotImplemented: - return other - - if context is None: - context = getcontext() - - ans = self._check_nans(other, context) - if ans: - return (ans, ans) - - sign = self._sign ^ other._sign - if self._isinfinity(): - if other._isinfinity(): - ans = context._raise_error(InvalidOperation, 'divmod(INF, INF)') - return ans, ans - else: - return (_SignedInfinity[sign], - context._raise_error(InvalidOperation, 'INF % x')) - - if not other: - if not self: - ans = context._raise_error(DivisionUndefined, 'divmod(0, 0)') - return ans, ans - else: - return (context._raise_error(DivisionByZero, 'x // 0', sign), - context._raise_error(InvalidOperation, 'x % 0')) - - quotient, remainder = self._divide(other, context) - remainder = remainder._fix(context) - return quotient, remainder - - def __rdivmod__(self, other, context=None): - """Swaps self/other and returns __divmod__.""" - other = _convert_other(other) - if other is NotImplemented: - return other - return other.__divmod__(self, context=context) - - def __mod__(self, other, context=None): - """ - self % other - """ - other = _convert_other(other) - if other is NotImplemented: - return other - - if context is None: - context = getcontext() - - ans = self._check_nans(other, context) - if ans: - return ans - - if self._isinfinity(): - return context._raise_error(InvalidOperation, 'INF % x') - elif not other: - if self: - return context._raise_error(InvalidOperation, 'x % 0') - else: - return context._raise_error(DivisionUndefined, '0 % 0') - - remainder = self._divide(other, context)[1] - remainder = remainder._fix(context) - return remainder - - def __rmod__(self, other, context=None): - """Swaps self/other and returns __mod__.""" - other = _convert_other(other) - if other is NotImplemented: - return other - return other.__mod__(self, context=context) - - def remainder_near(self, other, context=None): - """ - Remainder nearest to 0- abs(remainder-near) <= other/2 - """ - if context is None: - context = getcontext() - - other = _convert_other(other, raiseit=True) - - ans = self._check_nans(other, context) - if ans: - return ans - - # self == +/-infinity -> InvalidOperation - if self._isinfinity(): - return context._raise_error(InvalidOperation, - 'remainder_near(infinity, x)') - - # other == 0 -> either InvalidOperation or DivisionUndefined - if not other: - if self: - return context._raise_error(InvalidOperation, - 'remainder_near(x, 0)') - else: - return context._raise_error(DivisionUndefined, - 'remainder_near(0, 0)') - - # other = +/-infinity -> remainder = self - if other._isinfinity(): - ans = Decimal(self) - return ans._fix(context) - - # self = 0 -> remainder = self, with ideal exponent - ideal_exponent = min(self._exp, other._exp) - if not self: - ans = _dec_from_triple(self._sign, '0', ideal_exponent) - return ans._fix(context) - - # catch most cases of large or small quotient - expdiff = self.adjusted() - other.adjusted() - if expdiff >= context.prec + 1: - # expdiff >= prec+1 => abs(self/other) > 10**prec - return context._raise_error(DivisionImpossible) - if expdiff <= -2: - # expdiff <= -2 => abs(self/other) < 0.1 - ans = self._rescale(ideal_exponent, context.rounding) - return ans._fix(context) - - # adjust both arguments to have the same exponent, then divide - op1 = _WorkRep(self) - op2 = _WorkRep(other) - if op1.exp >= op2.exp: - op1.int *= 10**(op1.exp - op2.exp) - else: - op2.int *= 10**(op2.exp - op1.exp) - q, r = divmod(op1.int, op2.int) - # remainder is r*10**ideal_exponent; other is +/-op2.int * - # 10**ideal_exponent. Apply correction to ensure that - # abs(remainder) <= abs(other)/2 - if 2*r + (q&1) > op2.int: - r -= op2.int - q += 1 - - if q >= 10**context.prec: - return context._raise_error(DivisionImpossible) - - # result has same sign as self unless r is negative - sign = self._sign - if r < 0: - sign = 1-sign - r = -r - - ans = _dec_from_triple(sign, str(r), ideal_exponent) - return ans._fix(context) - - def __floordiv__(self, other, context=None): - """self // other""" - other = _convert_other(other) - if other is NotImplemented: - return other - - if context is None: - context = getcontext() - - ans = self._check_nans(other, context) - if ans: - return ans - - if self._isinfinity(): - if other._isinfinity(): - return context._raise_error(InvalidOperation, 'INF // INF') - else: - return _SignedInfinity[self._sign ^ other._sign] - - if not other: - if self: - return context._raise_error(DivisionByZero, 'x // 0', - self._sign ^ other._sign) - else: - return context._raise_error(DivisionUndefined, '0 // 0') - - return self._divide(other, context)[0] - - def __rfloordiv__(self, other, context=None): - """Swaps self/other and returns __floordiv__.""" - other = _convert_other(other) - if other is NotImplemented: - return other - return other.__floordiv__(self, context=context) - - def __float__(self): - """Float representation.""" - if self._isnan(): - if self.is_snan(): - raise ValueError("Cannot convert signaling NaN to float") - s = "-nan" if self._sign else "nan" - else: - s = str(self) - return float(s) - - def __int__(self): - """Converts self to an int, truncating if necessary.""" - if self._is_special: - if self._isnan(): - raise ValueError("Cannot convert NaN to integer") - elif self._isinfinity(): - raise OverflowError("Cannot convert infinity to integer") - s = (-1)**self._sign - if self._exp >= 0: - return s*int(self._int)*10**self._exp - else: - return s*int(self._int[:self._exp] or '0') - - __trunc__ = __int__ - - def real(self): - return self - real = property(real) - - def imag(self): - return Decimal(0) - imag = property(imag) - - def conjugate(self): - return self - - def __complex__(self): - return complex(float(self)) - - def __long__(self): - """Converts to a long. - - Equivalent to long(int(self)) - """ - return long(self.__int__()) - - def _fix_nan(self, context): - """Decapitate the payload of a NaN to fit the context""" - payload = self._int - - # maximum length of payload is precision if _clamp=0, - # precision-1 if _clamp=1. - max_payload_len = context.prec - context._clamp - if len(payload) > max_payload_len: - payload = payload[len(payload)-max_payload_len:].lstrip('0') - return _dec_from_triple(self._sign, payload, self._exp, True) - return Decimal(self) - - def _fix(self, context): - """Round if it is necessary to keep self within prec precision. - - Rounds and fixes the exponent. Does not raise on a sNaN. - - Arguments: - self - Decimal instance - context - context used. - """ - - if self._is_special: - if self._isnan(): - # decapitate payload if necessary - return self._fix_nan(context) - else: - # self is +/-Infinity; return unaltered - return Decimal(self) - - # if self is zero then exponent should be between Etiny and - # Emax if _clamp==0, and between Etiny and Etop if _clamp==1. - Etiny = context.Etiny() - Etop = context.Etop() - if not self: - exp_max = [context.Emax, Etop][context._clamp] - new_exp = min(max(self._exp, Etiny), exp_max) - if new_exp != self._exp: - context._raise_error(Clamped) - return _dec_from_triple(self._sign, '0', new_exp) - else: - return Decimal(self) - - # exp_min is the smallest allowable exponent of the result, - # equal to max(self.adjusted()-context.prec+1, Etiny) - exp_min = len(self._int) + self._exp - context.prec - if exp_min > Etop: - # overflow: exp_min > Etop iff self.adjusted() > Emax - ans = context._raise_error(Overflow, 'above Emax', self._sign) - context._raise_error(Inexact) - context._raise_error(Rounded) - return ans - - self_is_subnormal = exp_min < Etiny - if self_is_subnormal: - exp_min = Etiny - - # round if self has too many digits - if self._exp < exp_min: - digits = len(self._int) + self._exp - exp_min - if digits < 0: - self = _dec_from_triple(self._sign, '1', exp_min-1) - digits = 0 - rounding_method = self._pick_rounding_function[context.rounding] - changed = rounding_method(self, digits) - coeff = self._int[:digits] or '0' - if changed > 0: - coeff = str(int(coeff)+1) - if len(coeff) > context.prec: - coeff = coeff[:-1] - exp_min += 1 - - # check whether the rounding pushed the exponent out of range - if exp_min > Etop: - ans = context._raise_error(Overflow, 'above Emax', self._sign) - else: - ans = _dec_from_triple(self._sign, coeff, exp_min) - - # raise the appropriate signals, taking care to respect - # the precedence described in the specification - if changed and self_is_subnormal: - context._raise_error(Underflow) - if self_is_subnormal: - context._raise_error(Subnormal) - if changed: - context._raise_error(Inexact) - context._raise_error(Rounded) - if not ans: - # raise Clamped on underflow to 0 - context._raise_error(Clamped) - return ans - - if self_is_subnormal: - context._raise_error(Subnormal) - - # fold down if _clamp == 1 and self has too few digits - if context._clamp == 1 and self._exp > Etop: - context._raise_error(Clamped) - self_padded = self._int + '0'*(self._exp - Etop) - return _dec_from_triple(self._sign, self_padded, Etop) - - # here self was representable to begin with; return unchanged - return Decimal(self) - - # for each of the rounding functions below: - # self is a finite, nonzero Decimal - # prec is an integer satisfying 0 <= prec < len(self._int) - # - # each function returns either -1, 0, or 1, as follows: - # 1 indicates that self should be rounded up (away from zero) - # 0 indicates that self should be truncated, and that all the - # digits to be truncated are zeros (so the value is unchanged) - # -1 indicates that there are nonzero digits to be truncated - - def _round_down(self, prec): - """Also known as round-towards-0, truncate.""" - if _all_zeros(self._int, prec): - return 0 - else: - return -1 - - def _round_up(self, prec): - """Rounds away from 0.""" - return -self._round_down(prec) - - def _round_half_up(self, prec): - """Rounds 5 up (away from 0)""" - if self._int[prec] in '56789': - return 1 - elif _all_zeros(self._int, prec): - return 0 - else: - return -1 - - def _round_half_down(self, prec): - """Round 5 down""" - if _exact_half(self._int, prec): - return -1 - else: - return self._round_half_up(prec) - - def _round_half_even(self, prec): - """Round 5 to even, rest to nearest.""" - if _exact_half(self._int, prec) and \ - (prec == 0 or self._int[prec-1] in '02468'): - return -1 - else: - return self._round_half_up(prec) - - def _round_ceiling(self, prec): - """Rounds up (not away from 0 if negative.)""" - if self._sign: - return self._round_down(prec) - else: - return -self._round_down(prec) - - def _round_floor(self, prec): - """Rounds down (not towards 0 if negative)""" - if not self._sign: - return self._round_down(prec) - else: - return -self._round_down(prec) - - def _round_05up(self, prec): - """Round down unless digit prec-1 is 0 or 5.""" - if prec and self._int[prec-1] not in '05': - return self._round_down(prec) - else: - return -self._round_down(prec) - - _pick_rounding_function = dict( - ROUND_DOWN = _round_down, - ROUND_UP = _round_up, - ROUND_HALF_UP = _round_half_up, - ROUND_HALF_DOWN = _round_half_down, - ROUND_HALF_EVEN = _round_half_even, - ROUND_CEILING = _round_ceiling, - ROUND_FLOOR = _round_floor, - ROUND_05UP = _round_05up, - ) - - def fma(self, other, third, context=None): - """Fused multiply-add. - - Returns self*other+third with no rounding of the intermediate - product self*other. - - self and other are multiplied together, with no rounding of - the result. The third operand is then added to the result, - and a single final rounding is performed. - """ - - other = _convert_other(other, raiseit=True) - - # compute product; raise InvalidOperation if either operand is - # a signaling NaN or if the product is zero times infinity. - if self._is_special or other._is_special: - if context is None: - context = getcontext() - if self._exp == 'N': - return context._raise_error(InvalidOperation, 'sNaN', self) - if other._exp == 'N': - return context._raise_error(InvalidOperation, 'sNaN', other) - if self._exp == 'n': - product = self - elif other._exp == 'n': - product = other - elif self._exp == 'F': - if not other: - return context._raise_error(InvalidOperation, - 'INF * 0 in fma') - product = _SignedInfinity[self._sign ^ other._sign] - elif other._exp == 'F': - if not self: - return context._raise_error(InvalidOperation, - '0 * INF in fma') - product = _SignedInfinity[self._sign ^ other._sign] - else: - product = _dec_from_triple(self._sign ^ other._sign, - str(int(self._int) * int(other._int)), - self._exp + other._exp) - - third = _convert_other(third, raiseit=True) - return product.__add__(third, context) - - def _power_modulo(self, other, modulo, context=None): - """Three argument version of __pow__""" - - # if can't convert other and modulo to Decimal, raise - # TypeError; there's no point returning NotImplemented (no - # equivalent of __rpow__ for three argument pow) - other = _convert_other(other, raiseit=True) - modulo = _convert_other(modulo, raiseit=True) - - if context is None: - context = getcontext() - - # deal with NaNs: if there are any sNaNs then first one wins, - # (i.e. behaviour for NaNs is identical to that of fma) - self_is_nan = self._isnan() - other_is_nan = other._isnan() - modulo_is_nan = modulo._isnan() - if self_is_nan or other_is_nan or modulo_is_nan: - if self_is_nan == 2: - return context._raise_error(InvalidOperation, 'sNaN', - self) - if other_is_nan == 2: - return context._raise_error(InvalidOperation, 'sNaN', - other) - if modulo_is_nan == 2: - return context._raise_error(InvalidOperation, 'sNaN', - modulo) - if self_is_nan: - return self._fix_nan(context) - if other_is_nan: - return other._fix_nan(context) - return modulo._fix_nan(context) - - # check inputs: we apply same restrictions as Python's pow() - if not (self._isinteger() and - other._isinteger() and - modulo._isinteger()): - return context._raise_error(InvalidOperation, - 'pow() 3rd argument not allowed ' - 'unless all arguments are integers') - if other < 0: - return context._raise_error(InvalidOperation, - 'pow() 2nd argument cannot be ' - 'negative when 3rd argument specified') - if not modulo: - return context._raise_error(InvalidOperation, - 'pow() 3rd argument cannot be 0') - - # additional restriction for decimal: the modulus must be less - # than 10**prec in absolute value - if modulo.adjusted() >= context.prec: - return context._raise_error(InvalidOperation, - 'insufficient precision: pow() 3rd ' - 'argument must not have more than ' - 'precision digits') - - # define 0**0 == NaN, for consistency with two-argument pow - # (even though it hurts!) - if not other and not self: - return context._raise_error(InvalidOperation, - 'at least one of pow() 1st argument ' - 'and 2nd argument must be nonzero ;' - '0**0 is not defined') - - # compute sign of result - if other._iseven(): - sign = 0 - else: - sign = self._sign - - # convert modulo to a Python integer, and self and other to - # Decimal integers (i.e. force their exponents to be >= 0) - modulo = abs(int(modulo)) - base = _WorkRep(self.to_integral_value()) - exponent = _WorkRep(other.to_integral_value()) - - # compute result using integer pow() - base = (base.int % modulo * pow(10, base.exp, modulo)) % modulo - for i in xrange(exponent.exp): - base = pow(base, 10, modulo) - base = pow(base, exponent.int, modulo) - - return _dec_from_triple(sign, str(base), 0) - - def _power_exact(self, other, p): - """Attempt to compute self**other exactly. - - Given Decimals self and other and an integer p, attempt to - compute an exact result for the power self**other, with p - digits of precision. Return None if self**other is not - exactly representable in p digits. - - Assumes that elimination of special cases has already been - performed: self and other must both be nonspecial; self must - be positive and not numerically equal to 1; other must be - nonzero. For efficiency, other._exp should not be too large, - so that 10**abs(other._exp) is a feasible calculation.""" - - # In the comments below, we write x for the value of self and y for the - # value of other. Write x = xc*10**xe and abs(y) = yc*10**ye, with xc - # and yc positive integers not divisible by 10. - - # The main purpose of this method is to identify the *failure* - # of x**y to be exactly representable with as little effort as - # possible. So we look for cheap and easy tests that - # eliminate the possibility of x**y being exact. Only if all - # these tests are passed do we go on to actually compute x**y. - - # Here's the main idea. Express y as a rational number m/n, with m and - # n relatively prime and n>0. Then for x**y to be exactly - # representable (at *any* precision), xc must be the nth power of a - # positive integer and xe must be divisible by n. If y is negative - # then additionally xc must be a power of either 2 or 5, hence a power - # of 2**n or 5**n. - # - # There's a limit to how small |y| can be: if y=m/n as above - # then: - # - # (1) if xc != 1 then for the result to be representable we - # need xc**(1/n) >= 2, and hence also xc**|y| >= 2. So - # if |y| <= 1/nbits(xc) then xc < 2**nbits(xc) <= - # 2**(1/|y|), hence xc**|y| < 2 and the result is not - # representable. - # - # (2) if xe != 0, |xe|*(1/n) >= 1, so |xe|*|y| >= 1. Hence if - # |y| < 1/|xe| then the result is not representable. - # - # Note that since x is not equal to 1, at least one of (1) and - # (2) must apply. Now |y| < 1/nbits(xc) iff |yc|*nbits(xc) < - # 10**-ye iff len(str(|yc|*nbits(xc)) <= -ye. - # - # There's also a limit to how large y can be, at least if it's - # positive: the normalized result will have coefficient xc**y, - # so if it's representable then xc**y < 10**p, and y < - # p/log10(xc). Hence if y*log10(xc) >= p then the result is - # not exactly representable. - - # if len(str(abs(yc*xe)) <= -ye then abs(yc*xe) < 10**-ye, - # so |y| < 1/xe and the result is not representable. - # Similarly, len(str(abs(yc)*xc_bits)) <= -ye implies |y| - # < 1/nbits(xc). - - x = _WorkRep(self) - xc, xe = x.int, x.exp - while xc % 10 == 0: - xc //= 10 - xe += 1 - - y = _WorkRep(other) - yc, ye = y.int, y.exp - while yc % 10 == 0: - yc //= 10 - ye += 1 - - # case where xc == 1: result is 10**(xe*y), with xe*y - # required to be an integer - if xc == 1: - xe *= yc - # result is now 10**(xe * 10**ye); xe * 10**ye must be integral - while xe % 10 == 0: - xe //= 10 - ye += 1 - if ye < 0: - return None - exponent = xe * 10**ye - if y.sign == 1: - exponent = -exponent - # if other is a nonnegative integer, use ideal exponent - if other._isinteger() and other._sign == 0: - ideal_exponent = self._exp*int(other) - zeros = min(exponent-ideal_exponent, p-1) - else: - zeros = 0 - return _dec_from_triple(0, '1' + '0'*zeros, exponent-zeros) - - # case where y is negative: xc must be either a power - # of 2 or a power of 5. - if y.sign == 1: - last_digit = xc % 10 - if last_digit in (2,4,6,8): - # quick test for power of 2 - if xc & -xc != xc: - return None - # now xc is a power of 2; e is its exponent - e = _nbits(xc)-1 - - # We now have: - # - # x = 2**e * 10**xe, e > 0, and y < 0. - # - # The exact result is: - # - # x**y = 5**(-e*y) * 10**(e*y + xe*y) - # - # provided that both e*y and xe*y are integers. Note that if - # 5**(-e*y) >= 10**p, then the result can't be expressed - # exactly with p digits of precision. - # - # Using the above, we can guard against large values of ye. - # 93/65 is an upper bound for log(10)/log(5), so if - # - # ye >= len(str(93*p//65)) - # - # then - # - # -e*y >= -y >= 10**ye > 93*p/65 > p*log(10)/log(5), - # - # so 5**(-e*y) >= 10**p, and the coefficient of the result - # can't be expressed in p digits. - - # emax >= largest e such that 5**e < 10**p. - emax = p*93//65 - if ye >= len(str(emax)): - return None - - # Find -e*y and -xe*y; both must be integers - e = _decimal_lshift_exact(e * yc, ye) - xe = _decimal_lshift_exact(xe * yc, ye) - if e is None or xe is None: - return None - - if e > emax: - return None - xc = 5**e - - elif last_digit == 5: - # e >= log_5(xc) if xc is a power of 5; we have - # equality all the way up to xc=5**2658 - e = _nbits(xc)*28//65 - xc, remainder = divmod(5**e, xc) - if remainder: - return None - while xc % 5 == 0: - xc //= 5 - e -= 1 - - # Guard against large values of ye, using the same logic as in - # the 'xc is a power of 2' branch. 10/3 is an upper bound for - # log(10)/log(2). - emax = p*10//3 - if ye >= len(str(emax)): - return None - - e = _decimal_lshift_exact(e * yc, ye) - xe = _decimal_lshift_exact(xe * yc, ye) - if e is None or xe is None: - return None - - if e > emax: - return None - xc = 2**e - else: - return None - - if xc >= 10**p: - return None - xe = -e-xe - return _dec_from_triple(0, str(xc), xe) - - # now y is positive; find m and n such that y = m/n - if ye >= 0: - m, n = yc*10**ye, 1 - else: - if xe != 0 and len(str(abs(yc*xe))) <= -ye: - return None - xc_bits = _nbits(xc) - if xc != 1 and len(str(abs(yc)*xc_bits)) <= -ye: - return None - m, n = yc, 10**(-ye) - while m % 2 == n % 2 == 0: - m //= 2 - n //= 2 - while m % 5 == n % 5 == 0: - m //= 5 - n //= 5 - - # compute nth root of xc*10**xe - if n > 1: - # if 1 < xc < 2**n then xc isn't an nth power - if xc != 1 and xc_bits <= n: - return None - - xe, rem = divmod(xe, n) - if rem != 0: - return None - - # compute nth root of xc using Newton's method - a = 1L << -(-_nbits(xc)//n) # initial estimate - while True: - q, r = divmod(xc, a**(n-1)) - if a <= q: - break - else: - a = (a*(n-1) + q)//n - if not (a == q and r == 0): - return None - xc = a - - # now xc*10**xe is the nth root of the original xc*10**xe - # compute mth power of xc*10**xe - - # if m > p*100//_log10_lb(xc) then m > p/log10(xc), hence xc**m > - # 10**p and the result is not representable. - if xc > 1 and m > p*100//_log10_lb(xc): - return None - xc = xc**m - xe *= m - if xc > 10**p: - return None - - # by this point the result *is* exactly representable - # adjust the exponent to get as close as possible to the ideal - # exponent, if necessary - str_xc = str(xc) - if other._isinteger() and other._sign == 0: - ideal_exponent = self._exp*int(other) - zeros = min(xe-ideal_exponent, p-len(str_xc)) - else: - zeros = 0 - return _dec_from_triple(0, str_xc+'0'*zeros, xe-zeros) - - def __pow__(self, other, modulo=None, context=None): - """Return self ** other [ % modulo]. - - With two arguments, compute self**other. - - With three arguments, compute (self**other) % modulo. For the - three argument form, the following restrictions on the - arguments hold: - - - all three arguments must be integral - - other must be nonnegative - - either self or other (or both) must be nonzero - - modulo must be nonzero and must have at most p digits, - where p is the context precision. - - If any of these restrictions is violated the InvalidOperation - flag is raised. - - The result of pow(self, other, modulo) is identical to the - result that would be obtained by computing (self**other) % - modulo with unbounded precision, but is computed more - efficiently. It is always exact. - """ - - if modulo is not None: - return self._power_modulo(other, modulo, context) - - other = _convert_other(other) - if other is NotImplemented: - return other - - if context is None: - context = getcontext() - - # either argument is a NaN => result is NaN - ans = self._check_nans(other, context) - if ans: - return ans - - # 0**0 = NaN (!), x**0 = 1 for nonzero x (including +/-Infinity) - if not other: - if not self: - return context._raise_error(InvalidOperation, '0 ** 0') - else: - return _One - - # result has sign 1 iff self._sign is 1 and other is an odd integer - result_sign = 0 - if self._sign == 1: - if other._isinteger(): - if not other._iseven(): - result_sign = 1 - else: - # -ve**noninteger = NaN - # (-0)**noninteger = 0**noninteger - if self: - return context._raise_error(InvalidOperation, - 'x ** y with x negative and y not an integer') - # negate self, without doing any unwanted rounding - self = self.copy_negate() - - # 0**(+ve or Inf)= 0; 0**(-ve or -Inf) = Infinity - if not self: - if other._sign == 0: - return _dec_from_triple(result_sign, '0', 0) - else: - return _SignedInfinity[result_sign] - - # Inf**(+ve or Inf) = Inf; Inf**(-ve or -Inf) = 0 - if self._isinfinity(): - if other._sign == 0: - return _SignedInfinity[result_sign] - else: - return _dec_from_triple(result_sign, '0', 0) - - # 1**other = 1, but the choice of exponent and the flags - # depend on the exponent of self, and on whether other is a - # positive integer, a negative integer, or neither - if self == _One: - if other._isinteger(): - # exp = max(self._exp*max(int(other), 0), - # 1-context.prec) but evaluating int(other) directly - # is dangerous until we know other is small (other - # could be 1e999999999) - if other._sign == 1: - multiplier = 0 - elif other > context.prec: - multiplier = context.prec - else: - multiplier = int(other) - - exp = self._exp * multiplier - if exp < 1-context.prec: - exp = 1-context.prec - context._raise_error(Rounded) - else: - context._raise_error(Inexact) - context._raise_error(Rounded) - exp = 1-context.prec - - return _dec_from_triple(result_sign, '1'+'0'*-exp, exp) - - # compute adjusted exponent of self - self_adj = self.adjusted() - - # self ** infinity is infinity if self > 1, 0 if self < 1 - # self ** -infinity is infinity if self < 1, 0 if self > 1 - if other._isinfinity(): - if (other._sign == 0) == (self_adj < 0): - return _dec_from_triple(result_sign, '0', 0) - else: - return _SignedInfinity[result_sign] - - # from here on, the result always goes through the call - # to _fix at the end of this function. - ans = None - exact = False - - # crude test to catch cases of extreme overflow/underflow. If - # log10(self)*other >= 10**bound and bound >= len(str(Emax)) - # then 10**bound >= 10**len(str(Emax)) >= Emax+1 and hence - # self**other >= 10**(Emax+1), so overflow occurs. The test - # for underflow is similar. - bound = self._log10_exp_bound() + other.adjusted() - if (self_adj >= 0) == (other._sign == 0): - # self > 1 and other +ve, or self < 1 and other -ve - # possibility of overflow - if bound >= len(str(context.Emax)): - ans = _dec_from_triple(result_sign, '1', context.Emax+1) - else: - # self > 1 and other -ve, or self < 1 and other +ve - # possibility of underflow to 0 - Etiny = context.Etiny() - if bound >= len(str(-Etiny)): - ans = _dec_from_triple(result_sign, '1', Etiny-1) - - # try for an exact result with precision +1 - if ans is None: - ans = self._power_exact(other, context.prec + 1) - if ans is not None: - if result_sign == 1: - ans = _dec_from_triple(1, ans._int, ans._exp) - exact = True - - # usual case: inexact result, x**y computed directly as exp(y*log(x)) - if ans is None: - p = context.prec - x = _WorkRep(self) - xc, xe = x.int, x.exp - y = _WorkRep(other) - yc, ye = y.int, y.exp - if y.sign == 1: - yc = -yc - - # compute correctly rounded result: start with precision +3, - # then increase precision until result is unambiguously roundable - extra = 3 - while True: - coeff, exp = _dpower(xc, xe, yc, ye, p+extra) - if coeff % (5*10**(len(str(coeff))-p-1)): - break - extra += 3 - - ans = _dec_from_triple(result_sign, str(coeff), exp) - - # unlike exp, ln and log10, the power function respects the - # rounding mode; no need to switch to ROUND_HALF_EVEN here - - # There's a difficulty here when 'other' is not an integer and - # the result is exact. In this case, the specification - # requires that the Inexact flag be raised (in spite of - # exactness), but since the result is exact _fix won't do this - # for us. (Correspondingly, the Underflow signal should also - # be raised for subnormal results.) We can't directly raise - # these signals either before or after calling _fix, since - # that would violate the precedence for signals. So we wrap - # the ._fix call in a temporary context, and reraise - # afterwards. - if exact and not other._isinteger(): - # pad with zeros up to length context.prec+1 if necessary; this - # ensures that the Rounded signal will be raised. - if len(ans._int) <= context.prec: - expdiff = context.prec + 1 - len(ans._int) - ans = _dec_from_triple(ans._sign, ans._int+'0'*expdiff, - ans._exp-expdiff) - - # create a copy of the current context, with cleared flags/traps - newcontext = context.copy() - newcontext.clear_flags() - for exception in _signals: - newcontext.traps[exception] = 0 - - # round in the new context - ans = ans._fix(newcontext) - - # raise Inexact, and if necessary, Underflow - newcontext._raise_error(Inexact) - if newcontext.flags[Subnormal]: - newcontext._raise_error(Underflow) - - # propagate signals to the original context; _fix could - # have raised any of Overflow, Underflow, Subnormal, - # Inexact, Rounded, Clamped. Overflow needs the correct - # arguments. Note that the order of the exceptions is - # important here. - if newcontext.flags[Overflow]: - context._raise_error(Overflow, 'above Emax', ans._sign) - for exception in Underflow, Subnormal, Inexact, Rounded, Clamped: - if newcontext.flags[exception]: - context._raise_error(exception) - - else: - ans = ans._fix(context) - - return ans - - def __rpow__(self, other, context=None): - """Swaps self/other and returns __pow__.""" - other = _convert_other(other) - if other is NotImplemented: - return other - return other.__pow__(self, context=context) - - def normalize(self, context=None): - """Normalize- strip trailing 0s, change anything equal to 0 to 0e0""" - - if context is None: - context = getcontext() - - if self._is_special: - ans = self._check_nans(context=context) - if ans: - return ans - - dup = self._fix(context) - if dup._isinfinity(): - return dup - - if not dup: - return _dec_from_triple(dup._sign, '0', 0) - exp_max = [context.Emax, context.Etop()][context._clamp] - end = len(dup._int) - exp = dup._exp - while dup._int[end-1] == '0' and exp < exp_max: - exp += 1 - end -= 1 - return _dec_from_triple(dup._sign, dup._int[:end], exp) - - def quantize(self, exp, rounding=None, context=None, watchexp=True): - """Quantize self so its exponent is the same as that of exp. - - Similar to self._rescale(exp._exp) but with error checking. - """ - exp = _convert_other(exp, raiseit=True) - - if context is None: - context = getcontext() - if rounding is None: - rounding = context.rounding - - if self._is_special or exp._is_special: - ans = self._check_nans(exp, context) - if ans: - return ans - - if exp._isinfinity() or self._isinfinity(): - if exp._isinfinity() and self._isinfinity(): - return Decimal(self) # if both are inf, it is OK - return context._raise_error(InvalidOperation, - 'quantize with one INF') - - # if we're not watching exponents, do a simple rescale - if not watchexp: - ans = self._rescale(exp._exp, rounding) - # raise Inexact and Rounded where appropriate - if ans._exp > self._exp: - context._raise_error(Rounded) - if ans != self: - context._raise_error(Inexact) - return ans - - # exp._exp should be between Etiny and Emax - if not (context.Etiny() <= exp._exp <= context.Emax): - return context._raise_error(InvalidOperation, - 'target exponent out of bounds in quantize') - - if not self: - ans = _dec_from_triple(self._sign, '0', exp._exp) - return ans._fix(context) - - self_adjusted = self.adjusted() - if self_adjusted > context.Emax: - return context._raise_error(InvalidOperation, - 'exponent of quantize result too large for current context') - if self_adjusted - exp._exp + 1 > context.prec: - return context._raise_error(InvalidOperation, - 'quantize result has too many digits for current context') - - ans = self._rescale(exp._exp, rounding) - if ans.adjusted() > context.Emax: - return context._raise_error(InvalidOperation, - 'exponent of quantize result too large for current context') - if len(ans._int) > context.prec: - return context._raise_error(InvalidOperation, - 'quantize result has too many digits for current context') - - # raise appropriate flags - if ans and ans.adjusted() < context.Emin: - context._raise_error(Subnormal) - if ans._exp > self._exp: - if ans != self: - context._raise_error(Inexact) - context._raise_error(Rounded) - - # call to fix takes care of any necessary folddown, and - # signals Clamped if necessary - ans = ans._fix(context) - return ans - - def same_quantum(self, other): - """Return True if self and other have the same exponent; otherwise - return False. - - If either operand is a special value, the following rules are used: - * return True if both operands are infinities - * return True if both operands are NaNs - * otherwise, return False. - """ - other = _convert_other(other, raiseit=True) - if self._is_special or other._is_special: - return (self.is_nan() and other.is_nan() or - self.is_infinite() and other.is_infinite()) - return self._exp == other._exp - - def _rescale(self, exp, rounding): - """Rescale self so that the exponent is exp, either by padding with zeros - or by truncating digits, using the given rounding mode. - - Specials are returned without change. This operation is - quiet: it raises no flags, and uses no information from the - context. - - exp = exp to scale to (an integer) - rounding = rounding mode - """ - if self._is_special: - return Decimal(self) - if not self: - return _dec_from_triple(self._sign, '0', exp) - - if self._exp >= exp: - # pad answer with zeros if necessary - return _dec_from_triple(self._sign, - self._int + '0'*(self._exp - exp), exp) - - # too many digits; round and lose data. If self.adjusted() < - # exp-1, replace self by 10**(exp-1) before rounding - digits = len(self._int) + self._exp - exp - if digits < 0: - self = _dec_from_triple(self._sign, '1', exp-1) - digits = 0 - this_function = self._pick_rounding_function[rounding] - changed = this_function(self, digits) - coeff = self._int[:digits] or '0' - if changed == 1: - coeff = str(int(coeff)+1) - return _dec_from_triple(self._sign, coeff, exp) - - def _round(self, places, rounding): - """Round a nonzero, nonspecial Decimal to a fixed number of - significant figures, using the given rounding mode. - - Infinities, NaNs and zeros are returned unaltered. - - This operation is quiet: it raises no flags, and uses no - information from the context. - - """ - if places <= 0: - raise ValueError("argument should be at least 1 in _round") - if self._is_special or not self: - return Decimal(self) - ans = self._rescale(self.adjusted()+1-places, rounding) - # it can happen that the rescale alters the adjusted exponent; - # for example when rounding 99.97 to 3 significant figures. - # When this happens we end up with an extra 0 at the end of - # the number; a second rescale fixes this. - if ans.adjusted() != self.adjusted(): - ans = ans._rescale(ans.adjusted()+1-places, rounding) - return ans - - def to_integral_exact(self, rounding=None, context=None): - """Rounds to a nearby integer. - - If no rounding mode is specified, take the rounding mode from - the context. This method raises the Rounded and Inexact flags - when appropriate. - - See also: to_integral_value, which does exactly the same as - this method except that it doesn't raise Inexact or Rounded. - """ - if self._is_special: - ans = self._check_nans(context=context) - if ans: - return ans - return Decimal(self) - if self._exp >= 0: - return Decimal(self) - if not self: - return _dec_from_triple(self._sign, '0', 0) - if context is None: - context = getcontext() - if rounding is None: - rounding = context.rounding - ans = self._rescale(0, rounding) - if ans != self: - context._raise_error(Inexact) - context._raise_error(Rounded) - return ans - - def to_integral_value(self, rounding=None, context=None): - """Rounds to the nearest integer, without raising inexact, rounded.""" - if context is None: - context = getcontext() - if rounding is None: - rounding = context.rounding - if self._is_special: - ans = self._check_nans(context=context) - if ans: - return ans - return Decimal(self) - if self._exp >= 0: - return Decimal(self) - else: - return self._rescale(0, rounding) - - # the method name changed, but we provide also the old one, for compatibility - to_integral = to_integral_value - - def sqrt(self, context=None): - """Return the square root of self.""" - if context is None: - context = getcontext() - - if self._is_special: - ans = self._check_nans(context=context) - if ans: - return ans - - if self._isinfinity() and self._sign == 0: - return Decimal(self) - - if not self: - # exponent = self._exp // 2. sqrt(-0) = -0 - ans = _dec_from_triple(self._sign, '0', self._exp // 2) - return ans._fix(context) - - if self._sign == 1: - return context._raise_error(InvalidOperation, 'sqrt(-x), x > 0') - - # At this point self represents a positive number. Let p be - # the desired precision and express self in the form c*100**e - # with c a positive real number and e an integer, c and e - # being chosen so that 100**(p-1) <= c < 100**p. Then the - # (exact) square root of self is sqrt(c)*10**e, and 10**(p-1) - # <= sqrt(c) < 10**p, so the closest representable Decimal at - # precision p is n*10**e where n = round_half_even(sqrt(c)), - # the closest integer to sqrt(c) with the even integer chosen - # in the case of a tie. - # - # To ensure correct rounding in all cases, we use the - # following trick: we compute the square root to an extra - # place (precision p+1 instead of precision p), rounding down. - # Then, if the result is inexact and its last digit is 0 or 5, - # we increase the last digit to 1 or 6 respectively; if it's - # exact we leave the last digit alone. Now the final round to - # p places (or fewer in the case of underflow) will round - # correctly and raise the appropriate flags. - - # use an extra digit of precision - prec = context.prec+1 - - # write argument in the form c*100**e where e = self._exp//2 - # is the 'ideal' exponent, to be used if the square root is - # exactly representable. l is the number of 'digits' of c in - # base 100, so that 100**(l-1) <= c < 100**l. - op = _WorkRep(self) - e = op.exp >> 1 - if op.exp & 1: - c = op.int * 10 - l = (len(self._int) >> 1) + 1 - else: - c = op.int - l = len(self._int)+1 >> 1 - - # rescale so that c has exactly prec base 100 'digits' - shift = prec-l - if shift >= 0: - c *= 100**shift - exact = True - else: - c, remainder = divmod(c, 100**-shift) - exact = not remainder - e -= shift - - # find n = floor(sqrt(c)) using Newton's method - n = 10**prec - while True: - q = c//n - if n <= q: - break - else: - n = n + q >> 1 - exact = exact and n*n == c - - if exact: - # result is exact; rescale to use ideal exponent e - if shift >= 0: - # assert n % 10**shift == 0 - n //= 10**shift - else: - n *= 10**-shift - e += shift - else: - # result is not exact; fix last digit as described above - if n % 5 == 0: - n += 1 - - ans = _dec_from_triple(0, str(n), e) - - # round, and fit to current context - context = context._shallow_copy() - rounding = context._set_rounding(ROUND_HALF_EVEN) - ans = ans._fix(context) - context.rounding = rounding - - return ans - - def max(self, other, context=None): - """Returns the larger value. - - Like max(self, other) except if one is not a number, returns - NaN (and signals if one is sNaN). Also rounds. - """ - other = _convert_other(other, raiseit=True) - - if context is None: - context = getcontext() - - if self._is_special or other._is_special: - # If one operand is a quiet NaN and the other is number, then the - # number is always returned - sn = self._isnan() - on = other._isnan() - if sn or on: - if on == 1 and sn == 0: - return self._fix(context) - if sn == 1 and on == 0: - return other._fix(context) - return self._check_nans(other, context) - - c = self._cmp(other) - if c == 0: - # If both operands are finite and equal in numerical value - # then an ordering is applied: - # - # If the signs differ then max returns the operand with the - # positive sign and min returns the operand with the negative sign - # - # If the signs are the same then the exponent is used to select - # the result. This is exactly the ordering used in compare_total. - c = self.compare_total(other) - - if c == -1: - ans = other - else: - ans = self - - return ans._fix(context) - - def min(self, other, context=None): - """Returns the smaller value. - - Like min(self, other) except if one is not a number, returns - NaN (and signals if one is sNaN). Also rounds. - """ - other = _convert_other(other, raiseit=True) - - if context is None: - context = getcontext() - - if self._is_special or other._is_special: - # If one operand is a quiet NaN and the other is number, then the - # number is always returned - sn = self._isnan() - on = other._isnan() - if sn or on: - if on == 1 and sn == 0: - return self._fix(context) - if sn == 1 and on == 0: - return other._fix(context) - return self._check_nans(other, context) - - c = self._cmp(other) - if c == 0: - c = self.compare_total(other) - - if c == -1: - ans = self - else: - ans = other - - return ans._fix(context) - - def _isinteger(self): - """Returns whether self is an integer""" - if self._is_special: - return False - if self._exp >= 0: - return True - rest = self._int[self._exp:] - return rest == '0'*len(rest) - - def _iseven(self): - """Returns True if self is even. Assumes self is an integer.""" - if not self or self._exp > 0: - return True - return self._int[-1+self._exp] in '02468' - - def adjusted(self): - """Return the adjusted exponent of self""" - try: - return self._exp + len(self._int) - 1 - # If NaN or Infinity, self._exp is string - except TypeError: - return 0 - - def canonical(self, context=None): - """Returns the same Decimal object. - - As we do not have different encodings for the same number, the - received object already is in its canonical form. - """ - return self - - def compare_signal(self, other, context=None): - """Compares self to the other operand numerically. - - It's pretty much like compare(), but all NaNs signal, with signaling - NaNs taking precedence over quiet NaNs. - """ - other = _convert_other(other, raiseit = True) - ans = self._compare_check_nans(other, context) - if ans: - return ans - return self.compare(other, context=context) - - def compare_total(self, other): - """Compares self to other using the abstract representations. - - This is not like the standard compare, which use their numerical - value. Note that a total ordering is defined for all possible abstract - representations. - """ - other = _convert_other(other, raiseit=True) - - # if one is negative and the other is positive, it's easy - if self._sign and not other._sign: - return _NegativeOne - if not self._sign and other._sign: - return _One - sign = self._sign - - # let's handle both NaN types - self_nan = self._isnan() - other_nan = other._isnan() - if self_nan or other_nan: - if self_nan == other_nan: - # compare payloads as though they're integers - self_key = len(self._int), self._int - other_key = len(other._int), other._int - if self_key < other_key: - if sign: - return _One - else: - return _NegativeOne - if self_key > other_key: - if sign: - return _NegativeOne - else: - return _One - return _Zero - - if sign: - if self_nan == 1: - return _NegativeOne - if other_nan == 1: - return _One - if self_nan == 2: - return _NegativeOne - if other_nan == 2: - return _One - else: - if self_nan == 1: - return _One - if other_nan == 1: - return _NegativeOne - if self_nan == 2: - return _One - if other_nan == 2: - return _NegativeOne - - if self < other: - return _NegativeOne - if self > other: - return _One - - if self._exp < other._exp: - if sign: - return _One - else: - return _NegativeOne - if self._exp > other._exp: - if sign: - return _NegativeOne - else: - return _One - return _Zero - - - def compare_total_mag(self, other): - """Compares self to other using abstract repr., ignoring sign. - - Like compare_total, but with operand's sign ignored and assumed to be 0. - """ - other = _convert_other(other, raiseit=True) - - s = self.copy_abs() - o = other.copy_abs() - return s.compare_total(o) - - def copy_abs(self): - """Returns a copy with the sign set to 0. """ - return _dec_from_triple(0, self._int, self._exp, self._is_special) - - def copy_negate(self): - """Returns a copy with the sign inverted.""" - if self._sign: - return _dec_from_triple(0, self._int, self._exp, self._is_special) - else: - return _dec_from_triple(1, self._int, self._exp, self._is_special) - - def copy_sign(self, other): - """Returns self with the sign of other.""" - other = _convert_other(other, raiseit=True) - return _dec_from_triple(other._sign, self._int, - self._exp, self._is_special) - - def exp(self, context=None): - """Returns e ** self.""" - - if context is None: - context = getcontext() - - # exp(NaN) = NaN - ans = self._check_nans(context=context) - if ans: - return ans - - # exp(-Infinity) = 0 - if self._isinfinity() == -1: - return _Zero - - # exp(0) = 1 - if not self: - return _One - - # exp(Infinity) = Infinity - if self._isinfinity() == 1: - return Decimal(self) - - # the result is now guaranteed to be inexact (the true - # mathematical result is transcendental). There's no need to - # raise Rounded and Inexact here---they'll always be raised as - # a result of the call to _fix. - p = context.prec - adj = self.adjusted() - - # we only need to do any computation for quite a small range - # of adjusted exponents---for example, -29 <= adj <= 10 for - # the default context. For smaller exponent the result is - # indistinguishable from 1 at the given precision, while for - # larger exponent the result either overflows or underflows. - if self._sign == 0 and adj > len(str((context.Emax+1)*3)): - # overflow - ans = _dec_from_triple(0, '1', context.Emax+1) - elif self._sign == 1 and adj > len(str((-context.Etiny()+1)*3)): - # underflow to 0 - ans = _dec_from_triple(0, '1', context.Etiny()-1) - elif self._sign == 0 and adj < -p: - # p+1 digits; final round will raise correct flags - ans = _dec_from_triple(0, '1' + '0'*(p-1) + '1', -p) - elif self._sign == 1 and adj < -p-1: - # p+1 digits; final round will raise correct flags - ans = _dec_from_triple(0, '9'*(p+1), -p-1) - # general case - else: - op = _WorkRep(self) - c, e = op.int, op.exp - if op.sign == 1: - c = -c - - # compute correctly rounded result: increase precision by - # 3 digits at a time until we get an unambiguously - # roundable result - extra = 3 - while True: - coeff, exp = _dexp(c, e, p+extra) - if coeff % (5*10**(len(str(coeff))-p-1)): - break - extra += 3 - - ans = _dec_from_triple(0, str(coeff), exp) - - # at this stage, ans should round correctly with *any* - # rounding mode, not just with ROUND_HALF_EVEN - context = context._shallow_copy() - rounding = context._set_rounding(ROUND_HALF_EVEN) - ans = ans._fix(context) - context.rounding = rounding - - return ans - - def is_canonical(self): - """Return True if self is canonical; otherwise return False. - - Currently, the encoding of a Decimal instance is always - canonical, so this method returns True for any Decimal. - """ - return True - - def is_finite(self): - """Return True if self is finite; otherwise return False. - - A Decimal instance is considered finite if it is neither - infinite nor a NaN. - """ - return not self._is_special - - def is_infinite(self): - """Return True if self is infinite; otherwise return False.""" - return self._exp == 'F' - - def is_nan(self): - """Return True if self is a qNaN or sNaN; otherwise return False.""" - return self._exp in ('n', 'N') - - def is_normal(self, context=None): - """Return True if self is a normal number; otherwise return False.""" - if self._is_special or not self: - return False - if context is None: - context = getcontext() - return context.Emin <= self.adjusted() - - def is_qnan(self): - """Return True if self is a quiet NaN; otherwise return False.""" - return self._exp == 'n' - - def is_signed(self): - """Return True if self is negative; otherwise return False.""" - return self._sign == 1 - - def is_snan(self): - """Return True if self is a signaling NaN; otherwise return False.""" - return self._exp == 'N' - - def is_subnormal(self, context=None): - """Return True if self is subnormal; otherwise return False.""" - if self._is_special or not self: - return False - if context is None: - context = getcontext() - return self.adjusted() < context.Emin - - def is_zero(self): - """Return True if self is a zero; otherwise return False.""" - return not self._is_special and self._int == '0' - - def _ln_exp_bound(self): - """Compute a lower bound for the adjusted exponent of self.ln(). - In other words, compute r such that self.ln() >= 10**r. Assumes - that self is finite and positive and that self != 1. - """ - - # for 0.1 <= x <= 10 we use the inequalities 1-1/x <= ln(x) <= x-1 - adj = self._exp + len(self._int) - 1 - if adj >= 1: - # argument >= 10; we use 23/10 = 2.3 as a lower bound for ln(10) - return len(str(adj*23//10)) - 1 - if adj <= -2: - # argument <= 0.1 - return len(str((-1-adj)*23//10)) - 1 - op = _WorkRep(self) - c, e = op.int, op.exp - if adj == 0: - # 1 < self < 10 - num = str(c-10**-e) - den = str(c) - return len(num) - len(den) - (num < den) - # adj == -1, 0.1 <= self < 1 - return e + len(str(10**-e - c)) - 1 - - - def ln(self, context=None): - """Returns the natural (base e) logarithm of self.""" - - if context is None: - context = getcontext() - - # ln(NaN) = NaN - ans = self._check_nans(context=context) - if ans: - return ans - - # ln(0.0) == -Infinity - if not self: - return _NegativeInfinity - - # ln(Infinity) = Infinity - if self._isinfinity() == 1: - return _Infinity - - # ln(1.0) == 0.0 - if self == _One: - return _Zero - - # ln(negative) raises InvalidOperation - if self._sign == 1: - return context._raise_error(InvalidOperation, - 'ln of a negative value') - - # result is irrational, so necessarily inexact - op = _WorkRep(self) - c, e = op.int, op.exp - p = context.prec - - # correctly rounded result: repeatedly increase precision by 3 - # until we get an unambiguously roundable result - places = p - self._ln_exp_bound() + 2 # at least p+3 places - while True: - coeff = _dlog(c, e, places) - # assert len(str(abs(coeff)))-p >= 1 - if coeff % (5*10**(len(str(abs(coeff)))-p-1)): - break - places += 3 - ans = _dec_from_triple(int(coeff<0), str(abs(coeff)), -places) - - context = context._shallow_copy() - rounding = context._set_rounding(ROUND_HALF_EVEN) - ans = ans._fix(context) - context.rounding = rounding - return ans - - def _log10_exp_bound(self): - """Compute a lower bound for the adjusted exponent of self.log10(). - In other words, find r such that self.log10() >= 10**r. - Assumes that self is finite and positive and that self != 1. - """ - - # For x >= 10 or x < 0.1 we only need a bound on the integer - # part of log10(self), and this comes directly from the - # exponent of x. For 0.1 <= x <= 10 we use the inequalities - # 1-1/x <= log(x) <= x-1. If x > 1 we have |log10(x)| > - # (1-1/x)/2.31 > 0. If x < 1 then |log10(x)| > (1-x)/2.31 > 0 - - adj = self._exp + len(self._int) - 1 - if adj >= 1: - # self >= 10 - return len(str(adj))-1 - if adj <= -2: - # self < 0.1 - return len(str(-1-adj))-1 - op = _WorkRep(self) - c, e = op.int, op.exp - if adj == 0: - # 1 < self < 10 - num = str(c-10**-e) - den = str(231*c) - return len(num) - len(den) - (num < den) + 2 - # adj == -1, 0.1 <= self < 1 - num = str(10**-e-c) - return len(num) + e - (num < "231") - 1 - - def log10(self, context=None): - """Returns the base 10 logarithm of self.""" - - if context is None: - context = getcontext() - - # log10(NaN) = NaN - ans = self._check_nans(context=context) - if ans: - return ans - - # log10(0.0) == -Infinity - if not self: - return _NegativeInfinity - - # log10(Infinity) = Infinity - if self._isinfinity() == 1: - return _Infinity - - # log10(negative or -Infinity) raises InvalidOperation - if self._sign == 1: - return context._raise_error(InvalidOperation, - 'log10 of a negative value') - - # log10(10**n) = n - if self._int[0] == '1' and self._int[1:] == '0'*(len(self._int) - 1): - # answer may need rounding - ans = Decimal(self._exp + len(self._int) - 1) - else: - # result is irrational, so necessarily inexact - op = _WorkRep(self) - c, e = op.int, op.exp - p = context.prec - - # correctly rounded result: repeatedly increase precision - # until result is unambiguously roundable - places = p-self._log10_exp_bound()+2 - while True: - coeff = _dlog10(c, e, places) - # assert len(str(abs(coeff)))-p >= 1 - if coeff % (5*10**(len(str(abs(coeff)))-p-1)): - break - places += 3 - ans = _dec_from_triple(int(coeff<0), str(abs(coeff)), -places) - - context = context._shallow_copy() - rounding = context._set_rounding(ROUND_HALF_EVEN) - ans = ans._fix(context) - context.rounding = rounding - return ans - - def logb(self, context=None): - """ Returns the exponent of the magnitude of self's MSD. - - The result is the integer which is the exponent of the magnitude - of the most significant digit of self (as though it were truncated - to a single digit while maintaining the value of that digit and - without limiting the resulting exponent). - """ - # logb(NaN) = NaN - ans = self._check_nans(context=context) - if ans: - return ans - - if context is None: - context = getcontext() - - # logb(+/-Inf) = +Inf - if self._isinfinity(): - return _Infinity - - # logb(0) = -Inf, DivisionByZero - if not self: - return context._raise_error(DivisionByZero, 'logb(0)', 1) - - # otherwise, simply return the adjusted exponent of self, as a - # Decimal. Note that no attempt is made to fit the result - # into the current context. - ans = Decimal(self.adjusted()) - return ans._fix(context) - - def _islogical(self): - """Return True if self is a logical operand. - - For being logical, it must be a finite number with a sign of 0, - an exponent of 0, and a coefficient whose digits must all be - either 0 or 1. - """ - if self._sign != 0 or self._exp != 0: - return False - for dig in self._int: - if dig not in '01': - return False - return True - - def _fill_logical(self, context, opa, opb): - dif = context.prec - len(opa) - if dif > 0: - opa = '0'*dif + opa - elif dif < 0: - opa = opa[-context.prec:] - dif = context.prec - len(opb) - if dif > 0: - opb = '0'*dif + opb - elif dif < 0: - opb = opb[-context.prec:] - return opa, opb - - def logical_and(self, other, context=None): - """Applies an 'and' operation between self and other's digits.""" - if context is None: - context = getcontext() - - other = _convert_other(other, raiseit=True) - - if not self._islogical() or not other._islogical(): - return context._raise_error(InvalidOperation) - - # fill to context.prec - (opa, opb) = self._fill_logical(context, self._int, other._int) - - # make the operation, and clean starting zeroes - result = "".join([str(int(a)&int(b)) for a,b in zip(opa,opb)]) - return _dec_from_triple(0, result.lstrip('0') or '0', 0) - - def logical_invert(self, context=None): - """Invert all its digits.""" - if context is None: - context = getcontext() - return self.logical_xor(_dec_from_triple(0,'1'*context.prec,0), - context) - - def logical_or(self, other, context=None): - """Applies an 'or' operation between self and other's digits.""" - if context is None: - context = getcontext() - - other = _convert_other(other, raiseit=True) - - if not self._islogical() or not other._islogical(): - return context._raise_error(InvalidOperation) - - # fill to context.prec - (opa, opb) = self._fill_logical(context, self._int, other._int) - - # make the operation, and clean starting zeroes - result = "".join([str(int(a)|int(b)) for a,b in zip(opa,opb)]) - return _dec_from_triple(0, result.lstrip('0') or '0', 0) - - def logical_xor(self, other, context=None): - """Applies an 'xor' operation between self and other's digits.""" - if context is None: - context = getcontext() - - other = _convert_other(other, raiseit=True) - - if not self._islogical() or not other._islogical(): - return context._raise_error(InvalidOperation) - - # fill to context.prec - (opa, opb) = self._fill_logical(context, self._int, other._int) - - # make the operation, and clean starting zeroes - result = "".join([str(int(a)^int(b)) for a,b in zip(opa,opb)]) - return _dec_from_triple(0, result.lstrip('0') or '0', 0) - - def max_mag(self, other, context=None): - """Compares the values numerically with their sign ignored.""" - other = _convert_other(other, raiseit=True) - - if context is None: - context = getcontext() - - if self._is_special or other._is_special: - # If one operand is a quiet NaN and the other is number, then the - # number is always returned - sn = self._isnan() - on = other._isnan() - if sn or on: - if on == 1 and sn == 0: - return self._fix(context) - if sn == 1 and on == 0: - return other._fix(context) - return self._check_nans(other, context) - - c = self.copy_abs()._cmp(other.copy_abs()) - if c == 0: - c = self.compare_total(other) - - if c == -1: - ans = other - else: - ans = self - - return ans._fix(context) - - def min_mag(self, other, context=None): - """Compares the values numerically with their sign ignored.""" - other = _convert_other(other, raiseit=True) - - if context is None: - context = getcontext() - - if self._is_special or other._is_special: - # If one operand is a quiet NaN and the other is number, then the - # number is always returned - sn = self._isnan() - on = other._isnan() - if sn or on: - if on == 1 and sn == 0: - return self._fix(context) - if sn == 1 and on == 0: - return other._fix(context) - return self._check_nans(other, context) - - c = self.copy_abs()._cmp(other.copy_abs()) - if c == 0: - c = self.compare_total(other) - - if c == -1: - ans = self - else: - ans = other - - return ans._fix(context) - - def next_minus(self, context=None): - """Returns the largest representable number smaller than itself.""" - if context is None: - context = getcontext() - - ans = self._check_nans(context=context) - if ans: - return ans - - if self._isinfinity() == -1: - return _NegativeInfinity - if self._isinfinity() == 1: - return _dec_from_triple(0, '9'*context.prec, context.Etop()) - - context = context.copy() - context._set_rounding(ROUND_FLOOR) - context._ignore_all_flags() - new_self = self._fix(context) - if new_self != self: - return new_self - return self.__sub__(_dec_from_triple(0, '1', context.Etiny()-1), - context) - - def next_plus(self, context=None): - """Returns the smallest representable number larger than itself.""" - if context is None: - context = getcontext() - - ans = self._check_nans(context=context) - if ans: - return ans - - if self._isinfinity() == 1: - return _Infinity - if self._isinfinity() == -1: - return _dec_from_triple(1, '9'*context.prec, context.Etop()) - - context = context.copy() - context._set_rounding(ROUND_CEILING) - context._ignore_all_flags() - new_self = self._fix(context) - if new_self != self: - return new_self - return self.__add__(_dec_from_triple(0, '1', context.Etiny()-1), - context) - - def next_toward(self, other, context=None): - """Returns the number closest to self, in the direction towards other. - - The result is the closest representable number to self - (excluding self) that is in the direction towards other, - unless both have the same value. If the two operands are - numerically equal, then the result is a copy of self with the - sign set to be the same as the sign of other. - """ - other = _convert_other(other, raiseit=True) - - if context is None: - context = getcontext() - - ans = self._check_nans(other, context) - if ans: - return ans - - comparison = self._cmp(other) - if comparison == 0: - return self.copy_sign(other) - - if comparison == -1: - ans = self.next_plus(context) - else: # comparison == 1 - ans = self.next_minus(context) - - # decide which flags to raise using value of ans - if ans._isinfinity(): - context._raise_error(Overflow, - 'Infinite result from next_toward', - ans._sign) - context._raise_error(Inexact) - context._raise_error(Rounded) - elif ans.adjusted() < context.Emin: - context._raise_error(Underflow) - context._raise_error(Subnormal) - context._raise_error(Inexact) - context._raise_error(Rounded) - # if precision == 1 then we don't raise Clamped for a - # result 0E-Etiny. - if not ans: - context._raise_error(Clamped) - - return ans - - def number_class(self, context=None): - """Returns an indication of the class of self. - - The class is one of the following strings: - sNaN - NaN - -Infinity - -Normal - -Subnormal - -Zero - +Zero - +Subnormal - +Normal - +Infinity - """ - if self.is_snan(): - return "sNaN" - if self.is_qnan(): - return "NaN" - inf = self._isinfinity() - if inf == 1: - return "+Infinity" - if inf == -1: - return "-Infinity" - if self.is_zero(): - if self._sign: - return "-Zero" - else: - return "+Zero" - if context is None: - context = getcontext() - if self.is_subnormal(context=context): - if self._sign: - return "-Subnormal" - else: - return "+Subnormal" - # just a normal, regular, boring number, :) - if self._sign: - return "-Normal" - else: - return "+Normal" - - def radix(self): - """Just returns 10, as this is Decimal, :)""" - return Decimal(10) - - def rotate(self, other, context=None): - """Returns a rotated copy of self, value-of-other times.""" - if context is None: - context = getcontext() - - other = _convert_other(other, raiseit=True) - - ans = self._check_nans(other, context) - if ans: - return ans - - if other._exp != 0: - return context._raise_error(InvalidOperation) - if not (-context.prec <= int(other) <= context.prec): - return context._raise_error(InvalidOperation) - - if self._isinfinity(): - return Decimal(self) - - # get values, pad if necessary - torot = int(other) - rotdig = self._int - topad = context.prec - len(rotdig) - if topad > 0: - rotdig = '0'*topad + rotdig - elif topad < 0: - rotdig = rotdig[-topad:] - - # let's rotate! - rotated = rotdig[torot:] + rotdig[:torot] - return _dec_from_triple(self._sign, - rotated.lstrip('0') or '0', self._exp) - - def scaleb(self, other, context=None): - """Returns self operand after adding the second value to its exp.""" - if context is None: - context = getcontext() - - other = _convert_other(other, raiseit=True) - - ans = self._check_nans(other, context) - if ans: - return ans - - if other._exp != 0: - return context._raise_error(InvalidOperation) - liminf = -2 * (context.Emax + context.prec) - limsup = 2 * (context.Emax + context.prec) - if not (liminf <= int(other) <= limsup): - return context._raise_error(InvalidOperation) - - if self._isinfinity(): - return Decimal(self) - - d = _dec_from_triple(self._sign, self._int, self._exp + int(other)) - d = d._fix(context) - return d - - def shift(self, other, context=None): - """Returns a shifted copy of self, value-of-other times.""" - if context is None: - context = getcontext() - - other = _convert_other(other, raiseit=True) - - ans = self._check_nans(other, context) - if ans: - return ans - - if other._exp != 0: - return context._raise_error(InvalidOperation) - if not (-context.prec <= int(other) <= context.prec): - return context._raise_error(InvalidOperation) - - if self._isinfinity(): - return Decimal(self) - - # get values, pad if necessary - torot = int(other) - rotdig = self._int - topad = context.prec - len(rotdig) - if topad > 0: - rotdig = '0'*topad + rotdig - elif topad < 0: - rotdig = rotdig[-topad:] - - # let's shift! - if torot < 0: - shifted = rotdig[:torot] - else: - shifted = rotdig + '0'*torot - shifted = shifted[-context.prec:] - - return _dec_from_triple(self._sign, - shifted.lstrip('0') or '0', self._exp) - - # Support for pickling, copy, and deepcopy - def __reduce__(self): - return (self.__class__, (str(self),)) - - def __copy__(self): - if type(self) is Decimal: - return self # I'm immutable; therefore I am my own clone - return self.__class__(str(self)) - - def __deepcopy__(self, memo): - if type(self) is Decimal: - return self # My components are also immutable - return self.__class__(str(self)) - - # PEP 3101 support. the _localeconv keyword argument should be - # considered private: it's provided for ease of testing only. - def __format__(self, specifier, context=None, _localeconv=None): - """Format a Decimal instance according to the given specifier. - - The specifier should be a standard format specifier, with the - form described in PEP 3101. Formatting types 'e', 'E', 'f', - 'F', 'g', 'G', 'n' and '%' are supported. If the formatting - type is omitted it defaults to 'g' or 'G', depending on the - value of context.capitals. - """ - - # Note: PEP 3101 says that if the type is not present then - # there should be at least one digit after the decimal point. - # We take the liberty of ignoring this requirement for - # Decimal---it's presumably there to make sure that - # format(float, '') behaves similarly to str(float). - if context is None: - context = getcontext() - - spec = _parse_format_specifier(specifier, _localeconv=_localeconv) - - # special values don't care about the type or precision - if self._is_special: - sign = _format_sign(self._sign, spec) - body = str(self.copy_abs()) - if spec['type'] == '%': - body += '%' - return _format_align(sign, body, spec) - - # a type of None defaults to 'g' or 'G', depending on context - if spec['type'] is None: - spec['type'] = ['g', 'G'][context.capitals] - - # if type is '%', adjust exponent of self accordingly - if spec['type'] == '%': - self = _dec_from_triple(self._sign, self._int, self._exp+2) - - # round if necessary, taking rounding mode from the context - rounding = context.rounding - precision = spec['precision'] - if precision is not None: - if spec['type'] in 'eE': - self = self._round(precision+1, rounding) - elif spec['type'] in 'fF%': - self = self._rescale(-precision, rounding) - elif spec['type'] in 'gG' and len(self._int) > precision: - self = self._round(precision, rounding) - # special case: zeros with a positive exponent can't be - # represented in fixed point; rescale them to 0e0. - if not self and self._exp > 0 and spec['type'] in 'fF%': - self = self._rescale(0, rounding) - - # figure out placement of the decimal point - leftdigits = self._exp + len(self._int) - if spec['type'] in 'eE': - if not self and precision is not None: - dotplace = 1 - precision - else: - dotplace = 1 - elif spec['type'] in 'fF%': - dotplace = leftdigits - elif spec['type'] in 'gG': - if self._exp <= 0 and leftdigits > -6: - dotplace = leftdigits - else: - dotplace = 1 - - # find digits before and after decimal point, and get exponent - if dotplace < 0: - intpart = '0' - fracpart = '0'*(-dotplace) + self._int - elif dotplace > len(self._int): - intpart = self._int + '0'*(dotplace-len(self._int)) - fracpart = '' - else: - intpart = self._int[:dotplace] or '0' - fracpart = self._int[dotplace:] - exp = leftdigits-dotplace - - # done with the decimal-specific stuff; hand over the rest - # of the formatting to the _format_number function - return _format_number(self._sign, intpart, fracpart, exp, spec) - -def _dec_from_triple(sign, coefficient, exponent, special=False): - """Create a decimal instance directly, without any validation, - normalization (e.g. removal of leading zeros) or argument - conversion. - - This function is for *internal use only*. - """ - - self = object.__new__(Decimal) - self._sign = sign - self._int = coefficient - self._exp = exponent - self._is_special = special - - return self - -# Register Decimal as a kind of Number (an abstract base class). -# However, do not register it as Real (because Decimals are not -# interoperable with floats). -_numbers.Number.register(Decimal) - - -##### Context class ####################################################### - -class _ContextManager(object): - """Context manager class to support localcontext(). - - Sets a copy of the supplied context in __enter__() and restores - the previous decimal context in __exit__() - """ - def __init__(self, new_context): - self.new_context = new_context.copy() - def __enter__(self): - self.saved_context = getcontext() - setcontext(self.new_context) - return self.new_context - def __exit__(self, t, v, tb): - setcontext(self.saved_context) - -class Context(object): - """Contains the context for a Decimal instance. - - Contains: - prec - precision (for use in rounding, division, square roots..) - rounding - rounding type (how you round) - traps - If traps[exception] = 1, then the exception is - raised when it is caused. Otherwise, a value is - substituted in. - flags - When an exception is caused, flags[exception] is set. - (Whether or not the trap_enabler is set) - Should be reset by user of Decimal instance. - Emin - Minimum exponent - Emax - Maximum exponent - capitals - If 1, 1*10^1 is printed as 1E+1. - If 0, printed as 1e1 - _clamp - If 1, change exponents if too high (Default 0) - """ - - def __init__(self, prec=None, rounding=None, - traps=None, flags=None, - Emin=None, Emax=None, - capitals=None, _clamp=0, - _ignored_flags=None): - # Set defaults; for everything except flags and _ignored_flags, - # inherit from DefaultContext. - try: - dc = DefaultContext - except NameError: - pass - - self.prec = prec if prec is not None else dc.prec - self.rounding = rounding if rounding is not None else dc.rounding - self.Emin = Emin if Emin is not None else dc.Emin - self.Emax = Emax if Emax is not None else dc.Emax - self.capitals = capitals if capitals is not None else dc.capitals - self._clamp = _clamp if _clamp is not None else dc._clamp - - if _ignored_flags is None: - self._ignored_flags = [] - else: - self._ignored_flags = _ignored_flags - - if traps is None: - self.traps = dc.traps.copy() - elif not isinstance(traps, dict): - self.traps = dict((s, int(s in traps)) for s in _signals) - else: - self.traps = traps - - if flags is None: - self.flags = dict.fromkeys(_signals, 0) - elif not isinstance(flags, dict): - self.flags = dict((s, int(s in flags)) for s in _signals) - else: - self.flags = flags - - def __repr__(self): - """Show the current context.""" - s = [] - s.append('Context(prec=%(prec)d, rounding=%(rounding)s, ' - 'Emin=%(Emin)d, Emax=%(Emax)d, capitals=%(capitals)d' - % vars(self)) - names = [f.__name__ for f, v in self.flags.items() if v] - s.append('flags=[' + ', '.join(names) + ']') - names = [t.__name__ for t, v in self.traps.items() if v] - s.append('traps=[' + ', '.join(names) + ']') - return ', '.join(s) + ')' - - def clear_flags(self): - """Reset all flags to zero""" - for flag in self.flags: - self.flags[flag] = 0 - - def _shallow_copy(self): - """Returns a shallow copy from self.""" - nc = Context(self.prec, self.rounding, self.traps, - self.flags, self.Emin, self.Emax, - self.capitals, self._clamp, self._ignored_flags) - return nc - - def copy(self): - """Returns a deep copy from self.""" - nc = Context(self.prec, self.rounding, self.traps.copy(), - self.flags.copy(), self.Emin, self.Emax, - self.capitals, self._clamp, self._ignored_flags) - return nc - __copy__ = copy - - def _raise_error(self, condition, explanation = None, *args): - """Handles an error - - If the flag is in _ignored_flags, returns the default response. - Otherwise, it sets the flag, then, if the corresponding - trap_enabler is set, it reraises the exception. Otherwise, it returns - the default value after setting the flag. - """ - error = _condition_map.get(condition, condition) - if error in self._ignored_flags: - # Don't touch the flag - return error().handle(self, *args) - - self.flags[error] = 1 - if not self.traps[error]: - # The errors define how to handle themselves. - return condition().handle(self, *args) - - # Errors should only be risked on copies of the context - # self._ignored_flags = [] - raise error(explanation) - - def _ignore_all_flags(self): - """Ignore all flags, if they are raised""" - return self._ignore_flags(*_signals) - - def _ignore_flags(self, *flags): - """Ignore the flags, if they are raised""" - # Do not mutate-- This way, copies of a context leave the original - # alone. - self._ignored_flags = (self._ignored_flags + list(flags)) - return list(flags) - - def _regard_flags(self, *flags): - """Stop ignoring the flags, if they are raised""" - if flags and isinstance(flags[0], (tuple,list)): - flags = flags[0] - for flag in flags: - self._ignored_flags.remove(flag) - - # We inherit object.__hash__, so we must deny this explicitly - __hash__ = None - - def Etiny(self): - """Returns Etiny (= Emin - prec + 1)""" - return int(self.Emin - self.prec + 1) - - def Etop(self): - """Returns maximum exponent (= Emax - prec + 1)""" - return int(self.Emax - self.prec + 1) - - def _set_rounding(self, type): - """Sets the rounding type. - - Sets the rounding type, and returns the current (previous) - rounding type. Often used like: - - context = context.copy() - # so you don't change the calling context - # if an error occurs in the middle. - rounding = context._set_rounding(ROUND_UP) - val = self.__sub__(other, context=context) - context._set_rounding(rounding) - - This will make it round up for that operation. - """ - rounding = self.rounding - self.rounding= type - return rounding - - def create_decimal(self, num='0'): - """Creates a new Decimal instance but using self as context. - - This method implements the to-number operation of the - IBM Decimal specification.""" - - if isinstance(num, basestring) and num != num.strip(): - return self._raise_error(ConversionSyntax, - "no trailing or leading whitespace is " - "permitted.") - - d = Decimal(num, context=self) - if d._isnan() and len(d._int) > self.prec - self._clamp: - return self._raise_error(ConversionSyntax, - "diagnostic info too long in NaN") - return d._fix(self) - - def create_decimal_from_float(self, f): - """Creates a new Decimal instance from a float but rounding using self - as the context. - - >>> context = Context(prec=5, rounding=ROUND_DOWN) - >>> context.create_decimal_from_float(3.1415926535897932) - Decimal('3.1415') - >>> context = Context(prec=5, traps=[Inexact]) - >>> context.create_decimal_from_float(3.1415926535897932) - Traceback (most recent call last): - ... - Inexact: None - - """ - d = Decimal.from_float(f) # An exact conversion - return d._fix(self) # Apply the context rounding - - # Methods - def abs(self, a): - """Returns the absolute value of the operand. - - If the operand is negative, the result is the same as using the minus - operation on the operand. Otherwise, the result is the same as using - the plus operation on the operand. - - >>> ExtendedContext.abs(Decimal('2.1')) - Decimal('2.1') - >>> ExtendedContext.abs(Decimal('-100')) - Decimal('100') - >>> ExtendedContext.abs(Decimal('101.5')) - Decimal('101.5') - >>> ExtendedContext.abs(Decimal('-101.5')) - Decimal('101.5') - >>> ExtendedContext.abs(-1) - Decimal('1') - """ - a = _convert_other(a, raiseit=True) - return a.__abs__(context=self) - - def add(self, a, b): - """Return the sum of the two operands. - - >>> ExtendedContext.add(Decimal('12'), Decimal('7.00')) - Decimal('19.00') - >>> ExtendedContext.add(Decimal('1E+2'), Decimal('1.01E+4')) - Decimal('1.02E+4') - >>> ExtendedContext.add(1, Decimal(2)) - Decimal('3') - >>> ExtendedContext.add(Decimal(8), 5) - Decimal('13') - >>> ExtendedContext.add(5, 5) - Decimal('10') - """ - a = _convert_other(a, raiseit=True) - r = a.__add__(b, context=self) - if r is NotImplemented: - raise TypeError("Unable to convert %s to Decimal" % b) - else: - return r - - def _apply(self, a): - return str(a._fix(self)) - - def canonical(self, a): - """Returns the same Decimal object. - - As we do not have different encodings for the same number, the - received object already is in its canonical form. - - >>> ExtendedContext.canonical(Decimal('2.50')) - Decimal('2.50') - """ - return a.canonical(context=self) - - def compare(self, a, b): - """Compares values numerically. - - If the signs of the operands differ, a value representing each operand - ('-1' if the operand is less than zero, '0' if the operand is zero or - negative zero, or '1' if the operand is greater than zero) is used in - place of that operand for the comparison instead of the actual - operand. - - The comparison is then effected by subtracting the second operand from - the first and then returning a value according to the result of the - subtraction: '-1' if the result is less than zero, '0' if the result is - zero or negative zero, or '1' if the result is greater than zero. - - >>> ExtendedContext.compare(Decimal('2.1'), Decimal('3')) - Decimal('-1') - >>> ExtendedContext.compare(Decimal('2.1'), Decimal('2.1')) - Decimal('0') - >>> ExtendedContext.compare(Decimal('2.1'), Decimal('2.10')) - Decimal('0') - >>> ExtendedContext.compare(Decimal('3'), Decimal('2.1')) - Decimal('1') - >>> ExtendedContext.compare(Decimal('2.1'), Decimal('-3')) - Decimal('1') - >>> ExtendedContext.compare(Decimal('-3'), Decimal('2.1')) - Decimal('-1') - >>> ExtendedContext.compare(1, 2) - Decimal('-1') - >>> ExtendedContext.compare(Decimal(1), 2) - Decimal('-1') - >>> ExtendedContext.compare(1, Decimal(2)) - Decimal('-1') - """ - a = _convert_other(a, raiseit=True) - return a.compare(b, context=self) - - def compare_signal(self, a, b): - """Compares the values of the two operands numerically. - - It's pretty much like compare(), but all NaNs signal, with signaling - NaNs taking precedence over quiet NaNs. - - >>> c = ExtendedContext - >>> c.compare_signal(Decimal('2.1'), Decimal('3')) - Decimal('-1') - >>> c.compare_signal(Decimal('2.1'), Decimal('2.1')) - Decimal('0') - >>> c.flags[InvalidOperation] = 0 - >>> print c.flags[InvalidOperation] - 0 - >>> c.compare_signal(Decimal('NaN'), Decimal('2.1')) - Decimal('NaN') - >>> print c.flags[InvalidOperation] - 1 - >>> c.flags[InvalidOperation] = 0 - >>> print c.flags[InvalidOperation] - 0 - >>> c.compare_signal(Decimal('sNaN'), Decimal('2.1')) - Decimal('NaN') - >>> print c.flags[InvalidOperation] - 1 - >>> c.compare_signal(-1, 2) - Decimal('-1') - >>> c.compare_signal(Decimal(-1), 2) - Decimal('-1') - >>> c.compare_signal(-1, Decimal(2)) - Decimal('-1') - """ - a = _convert_other(a, raiseit=True) - return a.compare_signal(b, context=self) - - def compare_total(self, a, b): - """Compares two operands using their abstract representation. - - This is not like the standard compare, which use their numerical - value. Note that a total ordering is defined for all possible abstract - representations. - - >>> ExtendedContext.compare_total(Decimal('12.73'), Decimal('127.9')) - Decimal('-1') - >>> ExtendedContext.compare_total(Decimal('-127'), Decimal('12')) - Decimal('-1') - >>> ExtendedContext.compare_total(Decimal('12.30'), Decimal('12.3')) - Decimal('-1') - >>> ExtendedContext.compare_total(Decimal('12.30'), Decimal('12.30')) - Decimal('0') - >>> ExtendedContext.compare_total(Decimal('12.3'), Decimal('12.300')) - Decimal('1') - >>> ExtendedContext.compare_total(Decimal('12.3'), Decimal('NaN')) - Decimal('-1') - >>> ExtendedContext.compare_total(1, 2) - Decimal('-1') - >>> ExtendedContext.compare_total(Decimal(1), 2) - Decimal('-1') - >>> ExtendedContext.compare_total(1, Decimal(2)) - Decimal('-1') - """ - a = _convert_other(a, raiseit=True) - return a.compare_total(b) - - def compare_total_mag(self, a, b): - """Compares two operands using their abstract representation ignoring sign. - - Like compare_total, but with operand's sign ignored and assumed to be 0. - """ - a = _convert_other(a, raiseit=True) - return a.compare_total_mag(b) - - def copy_abs(self, a): - """Returns a copy of the operand with the sign set to 0. - - >>> ExtendedContext.copy_abs(Decimal('2.1')) - Decimal('2.1') - >>> ExtendedContext.copy_abs(Decimal('-100')) - Decimal('100') - >>> ExtendedContext.copy_abs(-1) - Decimal('1') - """ - a = _convert_other(a, raiseit=True) - return a.copy_abs() - - def copy_decimal(self, a): - """Returns a copy of the decimal object. - - >>> ExtendedContext.copy_decimal(Decimal('2.1')) - Decimal('2.1') - >>> ExtendedContext.copy_decimal(Decimal('-1.00')) - Decimal('-1.00') - >>> ExtendedContext.copy_decimal(1) - Decimal('1') - """ - a = _convert_other(a, raiseit=True) - return Decimal(a) - - def copy_negate(self, a): - """Returns a copy of the operand with the sign inverted. - - >>> ExtendedContext.copy_negate(Decimal('101.5')) - Decimal('-101.5') - >>> ExtendedContext.copy_negate(Decimal('-101.5')) - Decimal('101.5') - >>> ExtendedContext.copy_negate(1) - Decimal('-1') - """ - a = _convert_other(a, raiseit=True) - return a.copy_negate() - - def copy_sign(self, a, b): - """Copies the second operand's sign to the first one. - - In detail, it returns a copy of the first operand with the sign - equal to the sign of the second operand. - - >>> ExtendedContext.copy_sign(Decimal( '1.50'), Decimal('7.33')) - Decimal('1.50') - >>> ExtendedContext.copy_sign(Decimal('-1.50'), Decimal('7.33')) - Decimal('1.50') - >>> ExtendedContext.copy_sign(Decimal( '1.50'), Decimal('-7.33')) - Decimal('-1.50') - >>> ExtendedContext.copy_sign(Decimal('-1.50'), Decimal('-7.33')) - Decimal('-1.50') - >>> ExtendedContext.copy_sign(1, -2) - Decimal('-1') - >>> ExtendedContext.copy_sign(Decimal(1), -2) - Decimal('-1') - >>> ExtendedContext.copy_sign(1, Decimal(-2)) - Decimal('-1') - """ - a = _convert_other(a, raiseit=True) - return a.copy_sign(b) - - def divide(self, a, b): - """Decimal division in a specified context. - - >>> ExtendedContext.divide(Decimal('1'), Decimal('3')) - Decimal('0.333333333') - >>> ExtendedContext.divide(Decimal('2'), Decimal('3')) - Decimal('0.666666667') - >>> ExtendedContext.divide(Decimal('5'), Decimal('2')) - Decimal('2.5') - >>> ExtendedContext.divide(Decimal('1'), Decimal('10')) - Decimal('0.1') - >>> ExtendedContext.divide(Decimal('12'), Decimal('12')) - Decimal('1') - >>> ExtendedContext.divide(Decimal('8.00'), Decimal('2')) - Decimal('4.00') - >>> ExtendedContext.divide(Decimal('2.400'), Decimal('2.0')) - Decimal('1.20') - >>> ExtendedContext.divide(Decimal('1000'), Decimal('100')) - Decimal('10') - >>> ExtendedContext.divide(Decimal('1000'), Decimal('1')) - Decimal('1000') - >>> ExtendedContext.divide(Decimal('2.40E+6'), Decimal('2')) - Decimal('1.20E+6') - >>> ExtendedContext.divide(5, 5) - Decimal('1') - >>> ExtendedContext.divide(Decimal(5), 5) - Decimal('1') - >>> ExtendedContext.divide(5, Decimal(5)) - Decimal('1') - """ - a = _convert_other(a, raiseit=True) - r = a.__div__(b, context=self) - if r is NotImplemented: - raise TypeError("Unable to convert %s to Decimal" % b) - else: - return r - - def divide_int(self, a, b): - """Divides two numbers and returns the integer part of the result. - - >>> ExtendedContext.divide_int(Decimal('2'), Decimal('3')) - Decimal('0') - >>> ExtendedContext.divide_int(Decimal('10'), Decimal('3')) - Decimal('3') - >>> ExtendedContext.divide_int(Decimal('1'), Decimal('0.3')) - Decimal('3') - >>> ExtendedContext.divide_int(10, 3) - Decimal('3') - >>> ExtendedContext.divide_int(Decimal(10), 3) - Decimal('3') - >>> ExtendedContext.divide_int(10, Decimal(3)) - Decimal('3') - """ - a = _convert_other(a, raiseit=True) - r = a.__floordiv__(b, context=self) - if r is NotImplemented: - raise TypeError("Unable to convert %s to Decimal" % b) - else: - return r - - def divmod(self, a, b): - """Return (a // b, a % b). - - >>> ExtendedContext.divmod(Decimal(8), Decimal(3)) - (Decimal('2'), Decimal('2')) - >>> ExtendedContext.divmod(Decimal(8), Decimal(4)) - (Decimal('2'), Decimal('0')) - >>> ExtendedContext.divmod(8, 4) - (Decimal('2'), Decimal('0')) - >>> ExtendedContext.divmod(Decimal(8), 4) - (Decimal('2'), Decimal('0')) - >>> ExtendedContext.divmod(8, Decimal(4)) - (Decimal('2'), Decimal('0')) - """ - a = _convert_other(a, raiseit=True) - r = a.__divmod__(b, context=self) - if r is NotImplemented: - raise TypeError("Unable to convert %s to Decimal" % b) - else: - return r - - def exp(self, a): - """Returns e ** a. - - >>> c = ExtendedContext.copy() - >>> c.Emin = -999 - >>> c.Emax = 999 - >>> c.exp(Decimal('-Infinity')) - Decimal('0') - >>> c.exp(Decimal('-1')) - Decimal('0.367879441') - >>> c.exp(Decimal('0')) - Decimal('1') - >>> c.exp(Decimal('1')) - Decimal('2.71828183') - >>> c.exp(Decimal('0.693147181')) - Decimal('2.00000000') - >>> c.exp(Decimal('+Infinity')) - Decimal('Infinity') - >>> c.exp(10) - Decimal('22026.4658') - """ - a =_convert_other(a, raiseit=True) - return a.exp(context=self) - - def fma(self, a, b, c): - """Returns a multiplied by b, plus c. - - The first two operands are multiplied together, using multiply, - the third operand is then added to the result of that - multiplication, using add, all with only one final rounding. - - >>> ExtendedContext.fma(Decimal('3'), Decimal('5'), Decimal('7')) - Decimal('22') - >>> ExtendedContext.fma(Decimal('3'), Decimal('-5'), Decimal('7')) - Decimal('-8') - >>> ExtendedContext.fma(Decimal('888565290'), Decimal('1557.96930'), Decimal('-86087.7578')) - Decimal('1.38435736E+12') - >>> ExtendedContext.fma(1, 3, 4) - Decimal('7') - >>> ExtendedContext.fma(1, Decimal(3), 4) - Decimal('7') - >>> ExtendedContext.fma(1, 3, Decimal(4)) - Decimal('7') - """ - a = _convert_other(a, raiseit=True) - return a.fma(b, c, context=self) - - def is_canonical(self, a): - """Return True if the operand is canonical; otherwise return False. - - Currently, the encoding of a Decimal instance is always - canonical, so this method returns True for any Decimal. - - >>> ExtendedContext.is_canonical(Decimal('2.50')) - True - """ - return a.is_canonical() - - def is_finite(self, a): - """Return True if the operand is finite; otherwise return False. - - A Decimal instance is considered finite if it is neither - infinite nor a NaN. - - >>> ExtendedContext.is_finite(Decimal('2.50')) - True - >>> ExtendedContext.is_finite(Decimal('-0.3')) - True - >>> ExtendedContext.is_finite(Decimal('0')) - True - >>> ExtendedContext.is_finite(Decimal('Inf')) - False - >>> ExtendedContext.is_finite(Decimal('NaN')) - False - >>> ExtendedContext.is_finite(1) - True - """ - a = _convert_other(a, raiseit=True) - return a.is_finite() - - def is_infinite(self, a): - """Return True if the operand is infinite; otherwise return False. - - >>> ExtendedContext.is_infinite(Decimal('2.50')) - False - >>> ExtendedContext.is_infinite(Decimal('-Inf')) - True - >>> ExtendedContext.is_infinite(Decimal('NaN')) - False - >>> ExtendedContext.is_infinite(1) - False - """ - a = _convert_other(a, raiseit=True) - return a.is_infinite() - - def is_nan(self, a): - """Return True if the operand is a qNaN or sNaN; - otherwise return False. - - >>> ExtendedContext.is_nan(Decimal('2.50')) - False - >>> ExtendedContext.is_nan(Decimal('NaN')) - True - >>> ExtendedContext.is_nan(Decimal('-sNaN')) - True - >>> ExtendedContext.is_nan(1) - False - """ - a = _convert_other(a, raiseit=True) - return a.is_nan() - - def is_normal(self, a): - """Return True if the operand is a normal number; - otherwise return False. - - >>> c = ExtendedContext.copy() - >>> c.Emin = -999 - >>> c.Emax = 999 - >>> c.is_normal(Decimal('2.50')) - True - >>> c.is_normal(Decimal('0.1E-999')) - False - >>> c.is_normal(Decimal('0.00')) - False - >>> c.is_normal(Decimal('-Inf')) - False - >>> c.is_normal(Decimal('NaN')) - False - >>> c.is_normal(1) - True - """ - a = _convert_other(a, raiseit=True) - return a.is_normal(context=self) - - def is_qnan(self, a): - """Return True if the operand is a quiet NaN; otherwise return False. - - >>> ExtendedContext.is_qnan(Decimal('2.50')) - False - >>> ExtendedContext.is_qnan(Decimal('NaN')) - True - >>> ExtendedContext.is_qnan(Decimal('sNaN')) - False - >>> ExtendedContext.is_qnan(1) - False - """ - a = _convert_other(a, raiseit=True) - return a.is_qnan() - - def is_signed(self, a): - """Return True if the operand is negative; otherwise return False. - - >>> ExtendedContext.is_signed(Decimal('2.50')) - False - >>> ExtendedContext.is_signed(Decimal('-12')) - True - >>> ExtendedContext.is_signed(Decimal('-0')) - True - >>> ExtendedContext.is_signed(8) - False - >>> ExtendedContext.is_signed(-8) - True - """ - a = _convert_other(a, raiseit=True) - return a.is_signed() - - def is_snan(self, a): - """Return True if the operand is a signaling NaN; - otherwise return False. - - >>> ExtendedContext.is_snan(Decimal('2.50')) - False - >>> ExtendedContext.is_snan(Decimal('NaN')) - False - >>> ExtendedContext.is_snan(Decimal('sNaN')) - True - >>> ExtendedContext.is_snan(1) - False - """ - a = _convert_other(a, raiseit=True) - return a.is_snan() - - def is_subnormal(self, a): - """Return True if the operand is subnormal; otherwise return False. - - >>> c = ExtendedContext.copy() - >>> c.Emin = -999 - >>> c.Emax = 999 - >>> c.is_subnormal(Decimal('2.50')) - False - >>> c.is_subnormal(Decimal('0.1E-999')) - True - >>> c.is_subnormal(Decimal('0.00')) - False - >>> c.is_subnormal(Decimal('-Inf')) - False - >>> c.is_subnormal(Decimal('NaN')) - False - >>> c.is_subnormal(1) - False - """ - a = _convert_other(a, raiseit=True) - return a.is_subnormal(context=self) - - def is_zero(self, a): - """Return True if the operand is a zero; otherwise return False. - - >>> ExtendedContext.is_zero(Decimal('0')) - True - >>> ExtendedContext.is_zero(Decimal('2.50')) - False - >>> ExtendedContext.is_zero(Decimal('-0E+2')) - True - >>> ExtendedContext.is_zero(1) - False - >>> ExtendedContext.is_zero(0) - True - """ - a = _convert_other(a, raiseit=True) - return a.is_zero() - - def ln(self, a): - """Returns the natural (base e) logarithm of the operand. - - >>> c = ExtendedContext.copy() - >>> c.Emin = -999 - >>> c.Emax = 999 - >>> c.ln(Decimal('0')) - Decimal('-Infinity') - >>> c.ln(Decimal('1.000')) - Decimal('0') - >>> c.ln(Decimal('2.71828183')) - Decimal('1.00000000') - >>> c.ln(Decimal('10')) - Decimal('2.30258509') - >>> c.ln(Decimal('+Infinity')) - Decimal('Infinity') - >>> c.ln(1) - Decimal('0') - """ - a = _convert_other(a, raiseit=True) - return a.ln(context=self) - - def log10(self, a): - """Returns the base 10 logarithm of the operand. - - >>> c = ExtendedContext.copy() - >>> c.Emin = -999 - >>> c.Emax = 999 - >>> c.log10(Decimal('0')) - Decimal('-Infinity') - >>> c.log10(Decimal('0.001')) - Decimal('-3') - >>> c.log10(Decimal('1.000')) - Decimal('0') - >>> c.log10(Decimal('2')) - Decimal('0.301029996') - >>> c.log10(Decimal('10')) - Decimal('1') - >>> c.log10(Decimal('70')) - Decimal('1.84509804') - >>> c.log10(Decimal('+Infinity')) - Decimal('Infinity') - >>> c.log10(0) - Decimal('-Infinity') - >>> c.log10(1) - Decimal('0') - """ - a = _convert_other(a, raiseit=True) - return a.log10(context=self) - - def logb(self, a): - """ Returns the exponent of the magnitude of the operand's MSD. - - The result is the integer which is the exponent of the magnitude - of the most significant digit of the operand (as though the - operand were truncated to a single digit while maintaining the - value of that digit and without limiting the resulting exponent). - - >>> ExtendedContext.logb(Decimal('250')) - Decimal('2') - >>> ExtendedContext.logb(Decimal('2.50')) - Decimal('0') - >>> ExtendedContext.logb(Decimal('0.03')) - Decimal('-2') - >>> ExtendedContext.logb(Decimal('0')) - Decimal('-Infinity') - >>> ExtendedContext.logb(1) - Decimal('0') - >>> ExtendedContext.logb(10) - Decimal('1') - >>> ExtendedContext.logb(100) - Decimal('2') - """ - a = _convert_other(a, raiseit=True) - return a.logb(context=self) - - def logical_and(self, a, b): - """Applies the logical operation 'and' between each operand's digits. - - The operands must be both logical numbers. - - >>> ExtendedContext.logical_and(Decimal('0'), Decimal('0')) - Decimal('0') - >>> ExtendedContext.logical_and(Decimal('0'), Decimal('1')) - Decimal('0') - >>> ExtendedContext.logical_and(Decimal('1'), Decimal('0')) - Decimal('0') - >>> ExtendedContext.logical_and(Decimal('1'), Decimal('1')) - Decimal('1') - >>> ExtendedContext.logical_and(Decimal('1100'), Decimal('1010')) - Decimal('1000') - >>> ExtendedContext.logical_and(Decimal('1111'), Decimal('10')) - Decimal('10') - >>> ExtendedContext.logical_and(110, 1101) - Decimal('100') - >>> ExtendedContext.logical_and(Decimal(110), 1101) - Decimal('100') - >>> ExtendedContext.logical_and(110, Decimal(1101)) - Decimal('100') - """ - a = _convert_other(a, raiseit=True) - return a.logical_and(b, context=self) - - def logical_invert(self, a): - """Invert all the digits in the operand. - - The operand must be a logical number. - - >>> ExtendedContext.logical_invert(Decimal('0')) - Decimal('111111111') - >>> ExtendedContext.logical_invert(Decimal('1')) - Decimal('111111110') - >>> ExtendedContext.logical_invert(Decimal('111111111')) - Decimal('0') - >>> ExtendedContext.logical_invert(Decimal('101010101')) - Decimal('10101010') - >>> ExtendedContext.logical_invert(1101) - Decimal('111110010') - """ - a = _convert_other(a, raiseit=True) - return a.logical_invert(context=self) - - def logical_or(self, a, b): - """Applies the logical operation 'or' between each operand's digits. - - The operands must be both logical numbers. - - >>> ExtendedContext.logical_or(Decimal('0'), Decimal('0')) - Decimal('0') - >>> ExtendedContext.logical_or(Decimal('0'), Decimal('1')) - Decimal('1') - >>> ExtendedContext.logical_or(Decimal('1'), Decimal('0')) - Decimal('1') - >>> ExtendedContext.logical_or(Decimal('1'), Decimal('1')) - Decimal('1') - >>> ExtendedContext.logical_or(Decimal('1100'), Decimal('1010')) - Decimal('1110') - >>> ExtendedContext.logical_or(Decimal('1110'), Decimal('10')) - Decimal('1110') - >>> ExtendedContext.logical_or(110, 1101) - Decimal('1111') - >>> ExtendedContext.logical_or(Decimal(110), 1101) - Decimal('1111') - >>> ExtendedContext.logical_or(110, Decimal(1101)) - Decimal('1111') - """ - a = _convert_other(a, raiseit=True) - return a.logical_or(b, context=self) - - def logical_xor(self, a, b): - """Applies the logical operation 'xor' between each operand's digits. - - The operands must be both logical numbers. - - >>> ExtendedContext.logical_xor(Decimal('0'), Decimal('0')) - Decimal('0') - >>> ExtendedContext.logical_xor(Decimal('0'), Decimal('1')) - Decimal('1') - >>> ExtendedContext.logical_xor(Decimal('1'), Decimal('0')) - Decimal('1') - >>> ExtendedContext.logical_xor(Decimal('1'), Decimal('1')) - Decimal('0') - >>> ExtendedContext.logical_xor(Decimal('1100'), Decimal('1010')) - Decimal('110') - >>> ExtendedContext.logical_xor(Decimal('1111'), Decimal('10')) - Decimal('1101') - >>> ExtendedContext.logical_xor(110, 1101) - Decimal('1011') - >>> ExtendedContext.logical_xor(Decimal(110), 1101) - Decimal('1011') - >>> ExtendedContext.logical_xor(110, Decimal(1101)) - Decimal('1011') - """ - a = _convert_other(a, raiseit=True) - return a.logical_xor(b, context=self) - - def max(self, a, b): - """max compares two values numerically and returns the maximum. - - If either operand is a NaN then the general rules apply. - Otherwise, the operands are compared as though by the compare - operation. If they are numerically equal then the left-hand operand - is chosen as the result. Otherwise the maximum (closer to positive - infinity) of the two operands is chosen as the result. - - >>> ExtendedContext.max(Decimal('3'), Decimal('2')) - Decimal('3') - >>> ExtendedContext.max(Decimal('-10'), Decimal('3')) - Decimal('3') - >>> ExtendedContext.max(Decimal('1.0'), Decimal('1')) - Decimal('1') - >>> ExtendedContext.max(Decimal('7'), Decimal('NaN')) - Decimal('7') - >>> ExtendedContext.max(1, 2) - Decimal('2') - >>> ExtendedContext.max(Decimal(1), 2) - Decimal('2') - >>> ExtendedContext.max(1, Decimal(2)) - Decimal('2') - """ - a = _convert_other(a, raiseit=True) - return a.max(b, context=self) - - def max_mag(self, a, b): - """Compares the values numerically with their sign ignored. - - >>> ExtendedContext.max_mag(Decimal('7'), Decimal('NaN')) - Decimal('7') - >>> ExtendedContext.max_mag(Decimal('7'), Decimal('-10')) - Decimal('-10') - >>> ExtendedContext.max_mag(1, -2) - Decimal('-2') - >>> ExtendedContext.max_mag(Decimal(1), -2) - Decimal('-2') - >>> ExtendedContext.max_mag(1, Decimal(-2)) - Decimal('-2') - """ - a = _convert_other(a, raiseit=True) - return a.max_mag(b, context=self) - - def min(self, a, b): - """min compares two values numerically and returns the minimum. - - If either operand is a NaN then the general rules apply. - Otherwise, the operands are compared as though by the compare - operation. If they are numerically equal then the left-hand operand - is chosen as the result. Otherwise the minimum (closer to negative - infinity) of the two operands is chosen as the result. - - >>> ExtendedContext.min(Decimal('3'), Decimal('2')) - Decimal('2') - >>> ExtendedContext.min(Decimal('-10'), Decimal('3')) - Decimal('-10') - >>> ExtendedContext.min(Decimal('1.0'), Decimal('1')) - Decimal('1.0') - >>> ExtendedContext.min(Decimal('7'), Decimal('NaN')) - Decimal('7') - >>> ExtendedContext.min(1, 2) - Decimal('1') - >>> ExtendedContext.min(Decimal(1), 2) - Decimal('1') - >>> ExtendedContext.min(1, Decimal(29)) - Decimal('1') - """ - a = _convert_other(a, raiseit=True) - return a.min(b, context=self) - - def min_mag(self, a, b): - """Compares the values numerically with their sign ignored. - - >>> ExtendedContext.min_mag(Decimal('3'), Decimal('-2')) - Decimal('-2') - >>> ExtendedContext.min_mag(Decimal('-3'), Decimal('NaN')) - Decimal('-3') - >>> ExtendedContext.min_mag(1, -2) - Decimal('1') - >>> ExtendedContext.min_mag(Decimal(1), -2) - Decimal('1') - >>> ExtendedContext.min_mag(1, Decimal(-2)) - Decimal('1') - """ - a = _convert_other(a, raiseit=True) - return a.min_mag(b, context=self) - - def minus(self, a): - """Minus corresponds to unary prefix minus in Python. - - The operation is evaluated using the same rules as subtract; the - operation minus(a) is calculated as subtract('0', a) where the '0' - has the same exponent as the operand. - - >>> ExtendedContext.minus(Decimal('1.3')) - Decimal('-1.3') - >>> ExtendedContext.minus(Decimal('-1.3')) - Decimal('1.3') - >>> ExtendedContext.minus(1) - Decimal('-1') - """ - a = _convert_other(a, raiseit=True) - return a.__neg__(context=self) - - def multiply(self, a, b): - """multiply multiplies two operands. - - If either operand is a special value then the general rules apply. - Otherwise, the operands are multiplied together - ('long multiplication'), resulting in a number which may be as long as - the sum of the lengths of the two operands. - - >>> ExtendedContext.multiply(Decimal('1.20'), Decimal('3')) - Decimal('3.60') - >>> ExtendedContext.multiply(Decimal('7'), Decimal('3')) - Decimal('21') - >>> ExtendedContext.multiply(Decimal('0.9'), Decimal('0.8')) - Decimal('0.72') - >>> ExtendedContext.multiply(Decimal('0.9'), Decimal('-0')) - Decimal('-0.0') - >>> ExtendedContext.multiply(Decimal('654321'), Decimal('654321')) - Decimal('4.28135971E+11') - >>> ExtendedContext.multiply(7, 7) - Decimal('49') - >>> ExtendedContext.multiply(Decimal(7), 7) - Decimal('49') - >>> ExtendedContext.multiply(7, Decimal(7)) - Decimal('49') - """ - a = _convert_other(a, raiseit=True) - r = a.__mul__(b, context=self) - if r is NotImplemented: - raise TypeError("Unable to convert %s to Decimal" % b) - else: - return r - - def next_minus(self, a): - """Returns the largest representable number smaller than a. - - >>> c = ExtendedContext.copy() - >>> c.Emin = -999 - >>> c.Emax = 999 - >>> ExtendedContext.next_minus(Decimal('1')) - Decimal('0.999999999') - >>> c.next_minus(Decimal('1E-1007')) - Decimal('0E-1007') - >>> ExtendedContext.next_minus(Decimal('-1.00000003')) - Decimal('-1.00000004') - >>> c.next_minus(Decimal('Infinity')) - Decimal('9.99999999E+999') - >>> c.next_minus(1) - Decimal('0.999999999') - """ - a = _convert_other(a, raiseit=True) - return a.next_minus(context=self) - - def next_plus(self, a): - """Returns the smallest representable number larger than a. - - >>> c = ExtendedContext.copy() - >>> c.Emin = -999 - >>> c.Emax = 999 - >>> ExtendedContext.next_plus(Decimal('1')) - Decimal('1.00000001') - >>> c.next_plus(Decimal('-1E-1007')) - Decimal('-0E-1007') - >>> ExtendedContext.next_plus(Decimal('-1.00000003')) - Decimal('-1.00000002') - >>> c.next_plus(Decimal('-Infinity')) - Decimal('-9.99999999E+999') - >>> c.next_plus(1) - Decimal('1.00000001') - """ - a = _convert_other(a, raiseit=True) - return a.next_plus(context=self) - - def next_toward(self, a, b): - """Returns the number closest to a, in direction towards b. - - The result is the closest representable number from the first - operand (but not the first operand) that is in the direction - towards the second operand, unless the operands have the same - value. - - >>> c = ExtendedContext.copy() - >>> c.Emin = -999 - >>> c.Emax = 999 - >>> c.next_toward(Decimal('1'), Decimal('2')) - Decimal('1.00000001') - >>> c.next_toward(Decimal('-1E-1007'), Decimal('1')) - Decimal('-0E-1007') - >>> c.next_toward(Decimal('-1.00000003'), Decimal('0')) - Decimal('-1.00000002') - >>> c.next_toward(Decimal('1'), Decimal('0')) - Decimal('0.999999999') - >>> c.next_toward(Decimal('1E-1007'), Decimal('-100')) - Decimal('0E-1007') - >>> c.next_toward(Decimal('-1.00000003'), Decimal('-10')) - Decimal('-1.00000004') - >>> c.next_toward(Decimal('0.00'), Decimal('-0.0000')) - Decimal('-0.00') - >>> c.next_toward(0, 1) - Decimal('1E-1007') - >>> c.next_toward(Decimal(0), 1) - Decimal('1E-1007') - >>> c.next_toward(0, Decimal(1)) - Decimal('1E-1007') - """ - a = _convert_other(a, raiseit=True) - return a.next_toward(b, context=self) - - def normalize(self, a): - """normalize reduces an operand to its simplest form. - - Essentially a plus operation with all trailing zeros removed from the - result. - - >>> ExtendedContext.normalize(Decimal('2.1')) - Decimal('2.1') - >>> ExtendedContext.normalize(Decimal('-2.0')) - Decimal('-2') - >>> ExtendedContext.normalize(Decimal('1.200')) - Decimal('1.2') - >>> ExtendedContext.normalize(Decimal('-120')) - Decimal('-1.2E+2') - >>> ExtendedContext.normalize(Decimal('120.00')) - Decimal('1.2E+2') - >>> ExtendedContext.normalize(Decimal('0.00')) - Decimal('0') - >>> ExtendedContext.normalize(6) - Decimal('6') - """ - a = _convert_other(a, raiseit=True) - return a.normalize(context=self) - - def number_class(self, a): - """Returns an indication of the class of the operand. - - The class is one of the following strings: - -sNaN - -NaN - -Infinity - -Normal - -Subnormal - -Zero - +Zero - +Subnormal - +Normal - +Infinity - - >>> c = Context(ExtendedContext) - >>> c.Emin = -999 - >>> c.Emax = 999 - >>> c.number_class(Decimal('Infinity')) - '+Infinity' - >>> c.number_class(Decimal('1E-10')) - '+Normal' - >>> c.number_class(Decimal('2.50')) - '+Normal' - >>> c.number_class(Decimal('0.1E-999')) - '+Subnormal' - >>> c.number_class(Decimal('0')) - '+Zero' - >>> c.number_class(Decimal('-0')) - '-Zero' - >>> c.number_class(Decimal('-0.1E-999')) - '-Subnormal' - >>> c.number_class(Decimal('-1E-10')) - '-Normal' - >>> c.number_class(Decimal('-2.50')) - '-Normal' - >>> c.number_class(Decimal('-Infinity')) - '-Infinity' - >>> c.number_class(Decimal('NaN')) - 'NaN' - >>> c.number_class(Decimal('-NaN')) - 'NaN' - >>> c.number_class(Decimal('sNaN')) - 'sNaN' - >>> c.number_class(123) - '+Normal' - """ - a = _convert_other(a, raiseit=True) - return a.number_class(context=self) - - def plus(self, a): - """Plus corresponds to unary prefix plus in Python. - - The operation is evaluated using the same rules as add; the - operation plus(a) is calculated as add('0', a) where the '0' - has the same exponent as the operand. - - >>> ExtendedContext.plus(Decimal('1.3')) - Decimal('1.3') - >>> ExtendedContext.plus(Decimal('-1.3')) - Decimal('-1.3') - >>> ExtendedContext.plus(-1) - Decimal('-1') - """ - a = _convert_other(a, raiseit=True) - return a.__pos__(context=self) - - def power(self, a, b, modulo=None): - """Raises a to the power of b, to modulo if given. - - With two arguments, compute a**b. If a is negative then b - must be integral. The result will be inexact unless b is - integral and the result is finite and can be expressed exactly - in 'precision' digits. - - With three arguments, compute (a**b) % modulo. For the - three argument form, the following restrictions on the - arguments hold: - - - all three arguments must be integral - - b must be nonnegative - - at least one of a or b must be nonzero - - modulo must be nonzero and have at most 'precision' digits - - The result of pow(a, b, modulo) is identical to the result - that would be obtained by computing (a**b) % modulo with - unbounded precision, but is computed more efficiently. It is - always exact. - - >>> c = ExtendedContext.copy() - >>> c.Emin = -999 - >>> c.Emax = 999 - >>> c.power(Decimal('2'), Decimal('3')) - Decimal('8') - >>> c.power(Decimal('-2'), Decimal('3')) - Decimal('-8') - >>> c.power(Decimal('2'), Decimal('-3')) - Decimal('0.125') - >>> c.power(Decimal('1.7'), Decimal('8')) - Decimal('69.7575744') - >>> c.power(Decimal('10'), Decimal('0.301029996')) - Decimal('2.00000000') - >>> c.power(Decimal('Infinity'), Decimal('-1')) - Decimal('0') - >>> c.power(Decimal('Infinity'), Decimal('0')) - Decimal('1') - >>> c.power(Decimal('Infinity'), Decimal('1')) - Decimal('Infinity') - >>> c.power(Decimal('-Infinity'), Decimal('-1')) - Decimal('-0') - >>> c.power(Decimal('-Infinity'), Decimal('0')) - Decimal('1') - >>> c.power(Decimal('-Infinity'), Decimal('1')) - Decimal('-Infinity') - >>> c.power(Decimal('-Infinity'), Decimal('2')) - Decimal('Infinity') - >>> c.power(Decimal('0'), Decimal('0')) - Decimal('NaN') - - >>> c.power(Decimal('3'), Decimal('7'), Decimal('16')) - Decimal('11') - >>> c.power(Decimal('-3'), Decimal('7'), Decimal('16')) - Decimal('-11') - >>> c.power(Decimal('-3'), Decimal('8'), Decimal('16')) - Decimal('1') - >>> c.power(Decimal('3'), Decimal('7'), Decimal('-16')) - Decimal('11') - >>> c.power(Decimal('23E12345'), Decimal('67E189'), Decimal('123456789')) - Decimal('11729830') - >>> c.power(Decimal('-0'), Decimal('17'), Decimal('1729')) - Decimal('-0') - >>> c.power(Decimal('-23'), Decimal('0'), Decimal('65537')) - Decimal('1') - >>> ExtendedContext.power(7, 7) - Decimal('823543') - >>> ExtendedContext.power(Decimal(7), 7) - Decimal('823543') - >>> ExtendedContext.power(7, Decimal(7), 2) - Decimal('1') - """ - a = _convert_other(a, raiseit=True) - r = a.__pow__(b, modulo, context=self) - if r is NotImplemented: - raise TypeError("Unable to convert %s to Decimal" % b) - else: - return r - - def quantize(self, a, b): - """Returns a value equal to 'a' (rounded), having the exponent of 'b'. - - The coefficient of the result is derived from that of the left-hand - operand. It may be rounded using the current rounding setting (if the - exponent is being increased), multiplied by a positive power of ten (if - the exponent is being decreased), or is unchanged (if the exponent is - already equal to that of the right-hand operand). - - Unlike other operations, if the length of the coefficient after the - quantize operation would be greater than precision then an Invalid - operation condition is raised. This guarantees that, unless there is - an error condition, the exponent of the result of a quantize is always - equal to that of the right-hand operand. - - Also unlike other operations, quantize will never raise Underflow, even - if the result is subnormal and inexact. - - >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('0.001')) - Decimal('2.170') - >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('0.01')) - Decimal('2.17') - >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('0.1')) - Decimal('2.2') - >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('1e+0')) - Decimal('2') - >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('1e+1')) - Decimal('0E+1') - >>> ExtendedContext.quantize(Decimal('-Inf'), Decimal('Infinity')) - Decimal('-Infinity') - >>> ExtendedContext.quantize(Decimal('2'), Decimal('Infinity')) - Decimal('NaN') - >>> ExtendedContext.quantize(Decimal('-0.1'), Decimal('1')) - Decimal('-0') - >>> ExtendedContext.quantize(Decimal('-0'), Decimal('1e+5')) - Decimal('-0E+5') - >>> ExtendedContext.quantize(Decimal('+35236450.6'), Decimal('1e-2')) - Decimal('NaN') - >>> ExtendedContext.quantize(Decimal('-35236450.6'), Decimal('1e-2')) - Decimal('NaN') - >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e-1')) - Decimal('217.0') - >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e-0')) - Decimal('217') - >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e+1')) - Decimal('2.2E+2') - >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e+2')) - Decimal('2E+2') - >>> ExtendedContext.quantize(1, 2) - Decimal('1') - >>> ExtendedContext.quantize(Decimal(1), 2) - Decimal('1') - >>> ExtendedContext.quantize(1, Decimal(2)) - Decimal('1') - """ - a = _convert_other(a, raiseit=True) - return a.quantize(b, context=self) - - def radix(self): - """Just returns 10, as this is Decimal, :) - - >>> ExtendedContext.radix() - Decimal('10') - """ - return Decimal(10) - - def remainder(self, a, b): - """Returns the remainder from integer division. - - The result is the residue of the dividend after the operation of - calculating integer division as described for divide-integer, rounded - to precision digits if necessary. The sign of the result, if - non-zero, is the same as that of the original dividend. - - This operation will fail under the same conditions as integer division - (that is, if integer division on the same two operands would fail, the - remainder cannot be calculated). - - >>> ExtendedContext.remainder(Decimal('2.1'), Decimal('3')) - Decimal('2.1') - >>> ExtendedContext.remainder(Decimal('10'), Decimal('3')) - Decimal('1') - >>> ExtendedContext.remainder(Decimal('-10'), Decimal('3')) - Decimal('-1') - >>> ExtendedContext.remainder(Decimal('10.2'), Decimal('1')) - Decimal('0.2') - >>> ExtendedContext.remainder(Decimal('10'), Decimal('0.3')) - Decimal('0.1') - >>> ExtendedContext.remainder(Decimal('3.6'), Decimal('1.3')) - Decimal('1.0') - >>> ExtendedContext.remainder(22, 6) - Decimal('4') - >>> ExtendedContext.remainder(Decimal(22), 6) - Decimal('4') - >>> ExtendedContext.remainder(22, Decimal(6)) - Decimal('4') - """ - a = _convert_other(a, raiseit=True) - r = a.__mod__(b, context=self) - if r is NotImplemented: - raise TypeError("Unable to convert %s to Decimal" % b) - else: - return r - - def remainder_near(self, a, b): - """Returns to be "a - b * n", where n is the integer nearest the exact - value of "x / b" (if two integers are equally near then the even one - is chosen). If the result is equal to 0 then its sign will be the - sign of a. - - This operation will fail under the same conditions as integer division - (that is, if integer division on the same two operands would fail, the - remainder cannot be calculated). - - >>> ExtendedContext.remainder_near(Decimal('2.1'), Decimal('3')) - Decimal('-0.9') - >>> ExtendedContext.remainder_near(Decimal('10'), Decimal('6')) - Decimal('-2') - >>> ExtendedContext.remainder_near(Decimal('10'), Decimal('3')) - Decimal('1') - >>> ExtendedContext.remainder_near(Decimal('-10'), Decimal('3')) - Decimal('-1') - >>> ExtendedContext.remainder_near(Decimal('10.2'), Decimal('1')) - Decimal('0.2') - >>> ExtendedContext.remainder_near(Decimal('10'), Decimal('0.3')) - Decimal('0.1') - >>> ExtendedContext.remainder_near(Decimal('3.6'), Decimal('1.3')) - Decimal('-0.3') - >>> ExtendedContext.remainder_near(3, 11) - Decimal('3') - >>> ExtendedContext.remainder_near(Decimal(3), 11) - Decimal('3') - >>> ExtendedContext.remainder_near(3, Decimal(11)) - Decimal('3') - """ - a = _convert_other(a, raiseit=True) - return a.remainder_near(b, context=self) - - def rotate(self, a, b): - """Returns a rotated copy of a, b times. - - The coefficient of the result is a rotated copy of the digits in - the coefficient of the first operand. The number of places of - rotation is taken from the absolute value of the second operand, - with the rotation being to the left if the second operand is - positive or to the right otherwise. - - >>> ExtendedContext.rotate(Decimal('34'), Decimal('8')) - Decimal('400000003') - >>> ExtendedContext.rotate(Decimal('12'), Decimal('9')) - Decimal('12') - >>> ExtendedContext.rotate(Decimal('123456789'), Decimal('-2')) - Decimal('891234567') - >>> ExtendedContext.rotate(Decimal('123456789'), Decimal('0')) - Decimal('123456789') - >>> ExtendedContext.rotate(Decimal('123456789'), Decimal('+2')) - Decimal('345678912') - >>> ExtendedContext.rotate(1333333, 1) - Decimal('13333330') - >>> ExtendedContext.rotate(Decimal(1333333), 1) - Decimal('13333330') - >>> ExtendedContext.rotate(1333333, Decimal(1)) - Decimal('13333330') - """ - a = _convert_other(a, raiseit=True) - return a.rotate(b, context=self) - - def same_quantum(self, a, b): - """Returns True if the two operands have the same exponent. - - The result is never affected by either the sign or the coefficient of - either operand. - - >>> ExtendedContext.same_quantum(Decimal('2.17'), Decimal('0.001')) - False - >>> ExtendedContext.same_quantum(Decimal('2.17'), Decimal('0.01')) - True - >>> ExtendedContext.same_quantum(Decimal('2.17'), Decimal('1')) - False - >>> ExtendedContext.same_quantum(Decimal('Inf'), Decimal('-Inf')) - True - >>> ExtendedContext.same_quantum(10000, -1) - True - >>> ExtendedContext.same_quantum(Decimal(10000), -1) - True - >>> ExtendedContext.same_quantum(10000, Decimal(-1)) - True - """ - a = _convert_other(a, raiseit=True) - return a.same_quantum(b) - - def scaleb (self, a, b): - """Returns the first operand after adding the second value its exp. - - >>> ExtendedContext.scaleb(Decimal('7.50'), Decimal('-2')) - Decimal('0.0750') - >>> ExtendedContext.scaleb(Decimal('7.50'), Decimal('0')) - Decimal('7.50') - >>> ExtendedContext.scaleb(Decimal('7.50'), Decimal('3')) - Decimal('7.50E+3') - >>> ExtendedContext.scaleb(1, 4) - Decimal('1E+4') - >>> ExtendedContext.scaleb(Decimal(1), 4) - Decimal('1E+4') - >>> ExtendedContext.scaleb(1, Decimal(4)) - Decimal('1E+4') - """ - a = _convert_other(a, raiseit=True) - return a.scaleb(b, context=self) - - def shift(self, a, b): - """Returns a shifted copy of a, b times. - - The coefficient of the result is a shifted copy of the digits - in the coefficient of the first operand. The number of places - to shift is taken from the absolute value of the second operand, - with the shift being to the left if the second operand is - positive or to the right otherwise. Digits shifted into the - coefficient are zeros. - - >>> ExtendedContext.shift(Decimal('34'), Decimal('8')) - Decimal('400000000') - >>> ExtendedContext.shift(Decimal('12'), Decimal('9')) - Decimal('0') - >>> ExtendedContext.shift(Decimal('123456789'), Decimal('-2')) - Decimal('1234567') - >>> ExtendedContext.shift(Decimal('123456789'), Decimal('0')) - Decimal('123456789') - >>> ExtendedContext.shift(Decimal('123456789'), Decimal('+2')) - Decimal('345678900') - >>> ExtendedContext.shift(88888888, 2) - Decimal('888888800') - >>> ExtendedContext.shift(Decimal(88888888), 2) - Decimal('888888800') - >>> ExtendedContext.shift(88888888, Decimal(2)) - Decimal('888888800') - """ - a = _convert_other(a, raiseit=True) - return a.shift(b, context=self) - - def sqrt(self, a): - """Square root of a non-negative number to context precision. - - If the result must be inexact, it is rounded using the round-half-even - algorithm. - - >>> ExtendedContext.sqrt(Decimal('0')) - Decimal('0') - >>> ExtendedContext.sqrt(Decimal('-0')) - Decimal('-0') - >>> ExtendedContext.sqrt(Decimal('0.39')) - Decimal('0.624499800') - >>> ExtendedContext.sqrt(Decimal('100')) - Decimal('10') - >>> ExtendedContext.sqrt(Decimal('1')) - Decimal('1') - >>> ExtendedContext.sqrt(Decimal('1.0')) - Decimal('1.0') - >>> ExtendedContext.sqrt(Decimal('1.00')) - Decimal('1.0') - >>> ExtendedContext.sqrt(Decimal('7')) - Decimal('2.64575131') - >>> ExtendedContext.sqrt(Decimal('10')) - Decimal('3.16227766') - >>> ExtendedContext.sqrt(2) - Decimal('1.41421356') - >>> ExtendedContext.prec - 9 - """ - a = _convert_other(a, raiseit=True) - return a.sqrt(context=self) - - def subtract(self, a, b): - """Return the difference between the two operands. - - >>> ExtendedContext.subtract(Decimal('1.3'), Decimal('1.07')) - Decimal('0.23') - >>> ExtendedContext.subtract(Decimal('1.3'), Decimal('1.30')) - Decimal('0.00') - >>> ExtendedContext.subtract(Decimal('1.3'), Decimal('2.07')) - Decimal('-0.77') - >>> ExtendedContext.subtract(8, 5) - Decimal('3') - >>> ExtendedContext.subtract(Decimal(8), 5) - Decimal('3') - >>> ExtendedContext.subtract(8, Decimal(5)) - Decimal('3') - """ - a = _convert_other(a, raiseit=True) - r = a.__sub__(b, context=self) - if r is NotImplemented: - raise TypeError("Unable to convert %s to Decimal" % b) - else: - return r - - def to_eng_string(self, a): - """Convert to a string, using engineering notation if an exponent is needed. - - Engineering notation has an exponent which is a multiple of 3. This - can leave up to 3 digits to the left of the decimal place and may - require the addition of either one or two trailing zeros. - - The operation is not affected by the context. - - >>> ExtendedContext.to_eng_string(Decimal('123E+1')) - '1.23E+3' - >>> ExtendedContext.to_eng_string(Decimal('123E+3')) - '123E+3' - >>> ExtendedContext.to_eng_string(Decimal('123E-10')) - '12.3E-9' - >>> ExtendedContext.to_eng_string(Decimal('-123E-12')) - '-123E-12' - >>> ExtendedContext.to_eng_string(Decimal('7E-7')) - '700E-9' - >>> ExtendedContext.to_eng_string(Decimal('7E+1')) - '70' - >>> ExtendedContext.to_eng_string(Decimal('0E+1')) - '0.00E+3' - - """ - a = _convert_other(a, raiseit=True) - return a.to_eng_string(context=self) - - def to_sci_string(self, a): - """Converts a number to a string, using scientific notation. - - The operation is not affected by the context. - """ - a = _convert_other(a, raiseit=True) - return a.__str__(context=self) - - def to_integral_exact(self, a): - """Rounds to an integer. - - When the operand has a negative exponent, the result is the same - as using the quantize() operation using the given operand as the - left-hand-operand, 1E+0 as the right-hand-operand, and the precision - of the operand as the precision setting; Inexact and Rounded flags - are allowed in this operation. The rounding mode is taken from the - context. - - >>> ExtendedContext.to_integral_exact(Decimal('2.1')) - Decimal('2') - >>> ExtendedContext.to_integral_exact(Decimal('100')) - Decimal('100') - >>> ExtendedContext.to_integral_exact(Decimal('100.0')) - Decimal('100') - >>> ExtendedContext.to_integral_exact(Decimal('101.5')) - Decimal('102') - >>> ExtendedContext.to_integral_exact(Decimal('-101.5')) - Decimal('-102') - >>> ExtendedContext.to_integral_exact(Decimal('10E+5')) - Decimal('1.0E+6') - >>> ExtendedContext.to_integral_exact(Decimal('7.89E+77')) - Decimal('7.89E+77') - >>> ExtendedContext.to_integral_exact(Decimal('-Inf')) - Decimal('-Infinity') - """ - a = _convert_other(a, raiseit=True) - return a.to_integral_exact(context=self) - - def to_integral_value(self, a): - """Rounds to an integer. - - When the operand has a negative exponent, the result is the same - as using the quantize() operation using the given operand as the - left-hand-operand, 1E+0 as the right-hand-operand, and the precision - of the operand as the precision setting, except that no flags will - be set. The rounding mode is taken from the context. - - >>> ExtendedContext.to_integral_value(Decimal('2.1')) - Decimal('2') - >>> ExtendedContext.to_integral_value(Decimal('100')) - Decimal('100') - >>> ExtendedContext.to_integral_value(Decimal('100.0')) - Decimal('100') - >>> ExtendedContext.to_integral_value(Decimal('101.5')) - Decimal('102') - >>> ExtendedContext.to_integral_value(Decimal('-101.5')) - Decimal('-102') - >>> ExtendedContext.to_integral_value(Decimal('10E+5')) - Decimal('1.0E+6') - >>> ExtendedContext.to_integral_value(Decimal('7.89E+77')) - Decimal('7.89E+77') - >>> ExtendedContext.to_integral_value(Decimal('-Inf')) - Decimal('-Infinity') - """ - a = _convert_other(a, raiseit=True) - return a.to_integral_value(context=self) - - # the method name changed, but we provide also the old one, for compatibility - to_integral = to_integral_value - -class _WorkRep(object): - __slots__ = ('sign','int','exp') - # sign: 0 or 1 - # int: int or long - # exp: None, int, or string - - def __init__(self, value=None): - if value is None: - self.sign = None - self.int = 0 - self.exp = None - elif isinstance(value, Decimal): - self.sign = value._sign - self.int = int(value._int) - self.exp = value._exp - else: - # assert isinstance(value, tuple) - self.sign = value[0] - self.int = value[1] - self.exp = value[2] - - def __repr__(self): - return "(%r, %r, %r)" % (self.sign, self.int, self.exp) - - __str__ = __repr__ - - - -def _normalize(op1, op2, prec = 0): - """Normalizes op1, op2 to have the same exp and length of coefficient. - - Done during addition. - """ - if op1.exp < op2.exp: - tmp = op2 - other = op1 - else: - tmp = op1 - other = op2 - - # Let exp = min(tmp.exp - 1, tmp.adjusted() - precision - 1). - # Then adding 10**exp to tmp has the same effect (after rounding) - # as adding any positive quantity smaller than 10**exp; similarly - # for subtraction. So if other is smaller than 10**exp we replace - # it with 10**exp. This avoids tmp.exp - other.exp getting too large. - tmp_len = len(str(tmp.int)) - other_len = len(str(other.int)) - exp = tmp.exp + min(-1, tmp_len - prec - 2) - if other_len + other.exp - 1 < exp: - other.int = 1 - other.exp = exp - - tmp.int *= 10 ** (tmp.exp - other.exp) - tmp.exp = other.exp - return op1, op2 - -##### Integer arithmetic functions used by ln, log10, exp and __pow__ ##### - -# This function from Tim Peters was taken from here: -# http://mail.python.org/pipermail/python-list/1999-July/007758.html -# The correction being in the function definition is for speed, and -# the whole function is not resolved with math.log because of avoiding -# the use of floats. -def _nbits(n, correction = { - '0': 4, '1': 3, '2': 2, '3': 2, - '4': 1, '5': 1, '6': 1, '7': 1, - '8': 0, '9': 0, 'a': 0, 'b': 0, - 'c': 0, 'd': 0, 'e': 0, 'f': 0}): - """Number of bits in binary representation of the positive integer n, - or 0 if n == 0. - """ - if n < 0: - raise ValueError("The argument to _nbits should be nonnegative.") - hex_n = "%x" % n - return 4*len(hex_n) - correction[hex_n[0]] - -def _decimal_lshift_exact(n, e): - """ Given integers n and e, return n * 10**e if it's an integer, else None. - - The computation is designed to avoid computing large powers of 10 - unnecessarily. - - >>> _decimal_lshift_exact(3, 4) - 30000 - >>> _decimal_lshift_exact(300, -999999999) # returns None - - """ - if n == 0: - return 0 - elif e >= 0: - return n * 10**e - else: - # val_n = largest power of 10 dividing n. - str_n = str(abs(n)) - val_n = len(str_n) - len(str_n.rstrip('0')) - return None if val_n < -e else n // 10**-e - -def _sqrt_nearest(n, a): - """Closest integer to the square root of the positive integer n. a is - an initial approximation to the square root. Any positive integer - will do for a, but the closer a is to the square root of n the - faster convergence will be. - - """ - if n <= 0 or a <= 0: - raise ValueError("Both arguments to _sqrt_nearest should be positive.") - - b=0 - while a != b: - b, a = a, a--n//a>>1 - return a - -def _rshift_nearest(x, shift): - """Given an integer x and a nonnegative integer shift, return closest - integer to x / 2**shift; use round-to-even in case of a tie. - - """ - b, q = 1L << shift, x >> shift - return q + (2*(x & (b-1)) + (q&1) > b) - -def _div_nearest(a, b): - """Closest integer to a/b, a and b positive integers; rounds to even - in the case of a tie. - - """ - q, r = divmod(a, b) - return q + (2*r + (q&1) > b) - -def _ilog(x, M, L = 8): - """Integer approximation to M*log(x/M), with absolute error boundable - in terms only of x/M. - - Given positive integers x and M, return an integer approximation to - M * log(x/M). For L = 8 and 0.1 <= x/M <= 10 the difference - between the approximation and the exact result is at most 22. For - L = 8 and 1.0 <= x/M <= 10.0 the difference is at most 15. In - both cases these are upper bounds on the error; it will usually be - much smaller.""" - - # The basic algorithm is the following: let log1p be the function - # log1p(x) = log(1+x). Then log(x/M) = log1p((x-M)/M). We use - # the reduction - # - # log1p(y) = 2*log1p(y/(1+sqrt(1+y))) - # - # repeatedly until the argument to log1p is small (< 2**-L in - # absolute value). For small y we can use the Taylor series - # expansion - # - # log1p(y) ~ y - y**2/2 + y**3/3 - ... - (-y)**T/T - # - # truncating at T such that y**T is small enough. The whole - # computation is carried out in a form of fixed-point arithmetic, - # with a real number z being represented by an integer - # approximation to z*M. To avoid loss of precision, the y below - # is actually an integer approximation to 2**R*y*M, where R is the - # number of reductions performed so far. - - y = x-M - # argument reduction; R = number of reductions performed - R = 0 - while (R <= L and long(abs(y)) << L-R >= M or - R > L and abs(y) >> R-L >= M): - y = _div_nearest(long(M*y) << 1, - M + _sqrt_nearest(M*(M+_rshift_nearest(y, R)), M)) - R += 1 - - # Taylor series with T terms - T = -int(-10*len(str(M))//(3*L)) - yshift = _rshift_nearest(y, R) - w = _div_nearest(M, T) - for k in xrange(T-1, 0, -1): - w = _div_nearest(M, k) - _div_nearest(yshift*w, M) - - return _div_nearest(w*y, M) - -def _dlog10(c, e, p): - """Given integers c, e and p with c > 0, p >= 0, compute an integer - approximation to 10**p * log10(c*10**e), with an absolute error of - at most 1. Assumes that c*10**e is not exactly 1.""" - - # increase precision by 2; compensate for this by dividing - # final result by 100 - p += 2 - - # write c*10**e as d*10**f with either: - # f >= 0 and 1 <= d <= 10, or - # f <= 0 and 0.1 <= d <= 1. - # Thus for c*10**e close to 1, f = 0 - l = len(str(c)) - f = e+l - (e+l >= 1) - - if p > 0: - M = 10**p - k = e+p-f - if k >= 0: - c *= 10**k - else: - c = _div_nearest(c, 10**-k) - - log_d = _ilog(c, M) # error < 5 + 22 = 27 - log_10 = _log10_digits(p) # error < 1 - log_d = _div_nearest(log_d*M, log_10) - log_tenpower = f*M # exact - else: - log_d = 0 # error < 2.31 - log_tenpower = _div_nearest(f, 10**-p) # error < 0.5 - - return _div_nearest(log_tenpower+log_d, 100) - -def _dlog(c, e, p): - """Given integers c, e and p with c > 0, compute an integer - approximation to 10**p * log(c*10**e), with an absolute error of - at most 1. Assumes that c*10**e is not exactly 1.""" - - # Increase precision by 2. The precision increase is compensated - # for at the end with a division by 100. - p += 2 - - # rewrite c*10**e as d*10**f with either f >= 0 and 1 <= d <= 10, - # or f <= 0 and 0.1 <= d <= 1. Then we can compute 10**p * log(c*10**e) - # as 10**p * log(d) + 10**p*f * log(10). - l = len(str(c)) - f = e+l - (e+l >= 1) - - # compute approximation to 10**p*log(d), with error < 27 - if p > 0: - k = e+p-f - if k >= 0: - c *= 10**k - else: - c = _div_nearest(c, 10**-k) # error of <= 0.5 in c - - # _ilog magnifies existing error in c by a factor of at most 10 - log_d = _ilog(c, 10**p) # error < 5 + 22 = 27 - else: - # p <= 0: just approximate the whole thing by 0; error < 2.31 - log_d = 0 - - # compute approximation to f*10**p*log(10), with error < 11. - if f: - extra = len(str(abs(f)))-1 - if p + extra >= 0: - # error in f * _log10_digits(p+extra) < |f| * 1 = |f| - # after division, error < |f|/10**extra + 0.5 < 10 + 0.5 < 11 - f_log_ten = _div_nearest(f*_log10_digits(p+extra), 10**extra) - else: - f_log_ten = 0 - else: - f_log_ten = 0 - - # error in sum < 11+27 = 38; error after division < 0.38 + 0.5 < 1 - return _div_nearest(f_log_ten + log_d, 100) - -class _Log10Memoize(object): - """Class to compute, store, and allow retrieval of, digits of the - constant log(10) = 2.302585.... This constant is needed by - Decimal.ln, Decimal.log10, Decimal.exp and Decimal.__pow__.""" - def __init__(self): - self.digits = "23025850929940456840179914546843642076011014886" - - def getdigits(self, p): - """Given an integer p >= 0, return floor(10**p)*log(10). - - For example, self.getdigits(3) returns 2302. - """ - # digits are stored as a string, for quick conversion to - # integer in the case that we've already computed enough - # digits; the stored digits should always be correct - # (truncated, not rounded to nearest). - if p < 0: - raise ValueError("p should be nonnegative") - - if p >= len(self.digits): - # compute p+3, p+6, p+9, ... digits; continue until at - # least one of the extra digits is nonzero - extra = 3 - while True: - # compute p+extra digits, correct to within 1ulp - M = 10**(p+extra+2) - digits = str(_div_nearest(_ilog(10*M, M), 100)) - if digits[-extra:] != '0'*extra: - break - extra += 3 - # keep all reliable digits so far; remove trailing zeros - # and next nonzero digit - self.digits = digits.rstrip('0')[:-1] - return int(self.digits[:p+1]) - -_log10_digits = _Log10Memoize().getdigits - -def _iexp(x, M, L=8): - """Given integers x and M, M > 0, such that x/M is small in absolute - value, compute an integer approximation to M*exp(x/M). For 0 <= - x/M <= 2.4, the absolute error in the result is bounded by 60 (and - is usually much smaller).""" - - # Algorithm: to compute exp(z) for a real number z, first divide z - # by a suitable power R of 2 so that |z/2**R| < 2**-L. Then - # compute expm1(z/2**R) = exp(z/2**R) - 1 using the usual Taylor - # series - # - # expm1(x) = x + x**2/2! + x**3/3! + ... - # - # Now use the identity - # - # expm1(2x) = expm1(x)*(expm1(x)+2) - # - # R times to compute the sequence expm1(z/2**R), - # expm1(z/2**(R-1)), ... , exp(z/2), exp(z). - - # Find R such that x/2**R/M <= 2**-L - R = _nbits((long(x)< M - T = -int(-10*len(str(M))//(3*L)) - y = _div_nearest(x, T) - Mshift = long(M)<= 0: - cshift = c*10**shift - else: - cshift = c//10**-shift - quot, rem = divmod(cshift, _log10_digits(q)) - - # reduce remainder back to original precision - rem = _div_nearest(rem, 10**extra) - - # error in result of _iexp < 120; error after division < 0.62 - return _div_nearest(_iexp(rem, 10**p), 1000), quot - p + 3 - -def _dpower(xc, xe, yc, ye, p): - """Given integers xc, xe, yc and ye representing Decimals x = xc*10**xe and - y = yc*10**ye, compute x**y. Returns a pair of integers (c, e) such that: - - 10**(p-1) <= c <= 10**p, and - (c-1)*10**e < x**y < (c+1)*10**e - - in other words, c*10**e is an approximation to x**y with p digits - of precision, and with an error in c of at most 1. (This is - almost, but not quite, the same as the error being < 1ulp: when c - == 10**(p-1) we can only guarantee error < 10ulp.) - - We assume that: x is positive and not equal to 1, and y is nonzero. - """ - - # Find b such that 10**(b-1) <= |y| <= 10**b - b = len(str(abs(yc))) + ye - - # log(x) = lxc*10**(-p-b-1), to p+b+1 places after the decimal point - lxc = _dlog(xc, xe, p+b+1) - - # compute product y*log(x) = yc*lxc*10**(-p-b-1+ye) = pc*10**(-p-1) - shift = ye-b - if shift >= 0: - pc = lxc*yc*10**shift - else: - pc = _div_nearest(lxc*yc, 10**-shift) - - if pc == 0: - # we prefer a result that isn't exactly 1; this makes it - # easier to compute a correctly rounded result in __pow__ - if ((len(str(xc)) + xe >= 1) == (yc > 0)): # if x**y > 1: - coeff, exp = 10**(p-1)+1, 1-p - else: - coeff, exp = 10**p-1, -p - else: - coeff, exp = _dexp(pc, -(p+1), p+1) - coeff = _div_nearest(coeff, 10) - exp += 1 - - return coeff, exp - -def _log10_lb(c, correction = { - '1': 100, '2': 70, '3': 53, '4': 40, '5': 31, - '6': 23, '7': 16, '8': 10, '9': 5}): - """Compute a lower bound for 100*log10(c) for a positive integer c.""" - if c <= 0: - raise ValueError("The argument to _log10_lb should be nonnegative.") - str_c = str(c) - return 100*len(str_c) - correction[str_c[0]] - -##### Helper Functions #################################################### - -def _convert_other(other, raiseit=False, allow_float=False): - """Convert other to Decimal. - - Verifies that it's ok to use in an implicit construction. - If allow_float is true, allow conversion from float; this - is used in the comparison methods (__eq__ and friends). - - """ - if isinstance(other, Decimal): - return other - if isinstance(other, (int, long)): - return Decimal(other) - if allow_float and isinstance(other, float): - return Decimal.from_float(other) - - if raiseit: - raise TypeError("Unable to convert %s to Decimal" % other) - return NotImplemented - -##### Setup Specific Contexts ############################################ - -# The default context prototype used by Context() -# Is mutable, so that new contexts can have different default values - -DefaultContext = Context( - prec=28, rounding=ROUND_HALF_EVEN, - traps=[DivisionByZero, Overflow, InvalidOperation], - flags=[], - Emax=999999999, - Emin=-999999999, - capitals=1 -) - -# Pre-made alternate contexts offered by the specification -# Don't change these; the user should be able to select these -# contexts and be able to reproduce results from other implementations -# of the spec. - -BasicContext = Context( - prec=9, rounding=ROUND_HALF_UP, - traps=[DivisionByZero, Overflow, InvalidOperation, Clamped, Underflow], - flags=[], -) - -ExtendedContext = Context( - prec=9, rounding=ROUND_HALF_EVEN, - traps=[], - flags=[], -) - - -##### crud for parsing strings ############################################# -# -# Regular expression used for parsing numeric strings. Additional -# comments: -# -# 1. Uncomment the two '\s*' lines to allow leading and/or trailing -# whitespace. But note that the specification disallows whitespace in -# a numeric string. -# -# 2. For finite numbers (not infinities and NaNs) the body of the -# number between the optional sign and the optional exponent must have -# at least one decimal digit, possibly after the decimal point. The -# lookahead expression '(?=\d|\.\d)' checks this. - -import re -_parser = re.compile(r""" # A numeric string consists of: -# \s* - (?P[-+])? # an optional sign, followed by either... - ( - (?=\d|\.\d) # ...a number (with at least one digit) - (?P\d*) # having a (possibly empty) integer part - (\.(?P\d*))? # followed by an optional fractional part - (E(?P[-+]?\d+))? # followed by an optional exponent, or... - | - Inf(inity)? # ...an infinity, or... - | - (?Ps)? # ...an (optionally signaling) - NaN # NaN - (?P\d*) # with (possibly empty) diagnostic info. - ) -# \s* - \Z -""", re.VERBOSE | re.IGNORECASE | re.UNICODE).match - -_all_zeros = re.compile('0*$').match -_exact_half = re.compile('50*$').match - -##### PEP3101 support functions ############################################## -# The functions in this section have little to do with the Decimal -# class, and could potentially be reused or adapted for other pure -# Python numeric classes that want to implement __format__ -# -# A format specifier for Decimal looks like: -# -# [[fill]align][sign][0][minimumwidth][,][.precision][type] - -_parse_format_specifier_regex = re.compile(r"""\A -(?: - (?P.)? - (?P[<>=^]) -)? -(?P[-+ ])? -(?P0)? -(?P(?!0)\d+)? -(?P,)? -(?:\.(?P0|(?!0)\d+))? -(?P[eEfFgGn%])? -\Z -""", re.VERBOSE) - -del re - -# The locale module is only needed for the 'n' format specifier. The -# rest of the PEP 3101 code functions quite happily without it, so we -# don't care too much if locale isn't present. -try: - import locale as _locale -except ImportError: - pass - -def _parse_format_specifier(format_spec, _localeconv=None): - """Parse and validate a format specifier. - - Turns a standard numeric format specifier into a dict, with the - following entries: - - fill: fill character to pad field to minimum width - align: alignment type, either '<', '>', '=' or '^' - sign: either '+', '-' or ' ' - minimumwidth: nonnegative integer giving minimum width - zeropad: boolean, indicating whether to pad with zeros - thousands_sep: string to use as thousands separator, or '' - grouping: grouping for thousands separators, in format - used by localeconv - decimal_point: string to use for decimal point - precision: nonnegative integer giving precision, or None - type: one of the characters 'eEfFgG%', or None - unicode: boolean (always True for Python 3.x) - - """ - m = _parse_format_specifier_regex.match(format_spec) - if m is None: - raise ValueError("Invalid format specifier: " + format_spec) - - # get the dictionary - format_dict = m.groupdict() - - # zeropad; defaults for fill and alignment. If zero padding - # is requested, the fill and align fields should be absent. - fill = format_dict['fill'] - align = format_dict['align'] - format_dict['zeropad'] = (format_dict['zeropad'] is not None) - if format_dict['zeropad']: - if fill is not None: - raise ValueError("Fill character conflicts with '0'" - " in format specifier: " + format_spec) - if align is not None: - raise ValueError("Alignment conflicts with '0' in " - "format specifier: " + format_spec) - format_dict['fill'] = fill or ' ' - # PEP 3101 originally specified that the default alignment should - # be left; it was later agreed that right-aligned makes more sense - # for numeric types. See http://bugs.python.org/issue6857. - format_dict['align'] = align or '>' - - # default sign handling: '-' for negative, '' for positive - if format_dict['sign'] is None: - format_dict['sign'] = '-' - - # minimumwidth defaults to 0; precision remains None if not given - format_dict['minimumwidth'] = int(format_dict['minimumwidth'] or '0') - if format_dict['precision'] is not None: - format_dict['precision'] = int(format_dict['precision']) - - # if format type is 'g' or 'G' then a precision of 0 makes little - # sense; convert it to 1. Same if format type is unspecified. - if format_dict['precision'] == 0: - if format_dict['type'] is None or format_dict['type'] in 'gG': - format_dict['precision'] = 1 - - # determine thousands separator, grouping, and decimal separator, and - # add appropriate entries to format_dict - if format_dict['type'] == 'n': - # apart from separators, 'n' behaves just like 'g' - format_dict['type'] = 'g' - if _localeconv is None: - _localeconv = _locale.localeconv() - if format_dict['thousands_sep'] is not None: - raise ValueError("Explicit thousands separator conflicts with " - "'n' type in format specifier: " + format_spec) - format_dict['thousands_sep'] = _localeconv['thousands_sep'] - format_dict['grouping'] = _localeconv['grouping'] - format_dict['decimal_point'] = _localeconv['decimal_point'] - else: - if format_dict['thousands_sep'] is None: - format_dict['thousands_sep'] = '' - format_dict['grouping'] = [3, 0] - format_dict['decimal_point'] = '.' - - # record whether return type should be str or unicode - try: - format_dict['unicode'] = isinstance(format_spec, unicode) - except NameError: - format_dict['unicode'] = False - - return format_dict - -def _format_align(sign, body, spec): - """Given an unpadded, non-aligned numeric string 'body' and sign - string 'sign', add padding and alignment conforming to the given - format specifier dictionary 'spec' (as produced by - parse_format_specifier). - - Also converts result to unicode if necessary. - - """ - # how much extra space do we have to play with? - minimumwidth = spec['minimumwidth'] - fill = spec['fill'] - padding = fill*(minimumwidth - len(sign) - len(body)) - - align = spec['align'] - if align == '<': - result = sign + body + padding - elif align == '>': - result = padding + sign + body - elif align == '=': - result = sign + padding + body - elif align == '^': - half = len(padding)//2 - result = padding[:half] + sign + body + padding[half:] - else: - raise ValueError('Unrecognised alignment field') - - # make sure that result is unicode if necessary - if spec['unicode']: - result = unicode(result) - - return result - -def _group_lengths(grouping): - """Convert a localeconv-style grouping into a (possibly infinite) - iterable of integers representing group lengths. - - """ - # The result from localeconv()['grouping'], and the input to this - # function, should be a list of integers in one of the - # following three forms: - # - # (1) an empty list, or - # (2) nonempty list of positive integers + [0] - # (3) list of positive integers + [locale.CHAR_MAX], or - - from itertools import chain, repeat - if not grouping: - return [] - elif grouping[-1] == 0 and len(grouping) >= 2: - return chain(grouping[:-1], repeat(grouping[-2])) - elif grouping[-1] == _locale.CHAR_MAX: - return grouping[:-1] - else: - raise ValueError('unrecognised format for grouping') - -def _insert_thousands_sep(digits, spec, min_width=1): - """Insert thousands separators into a digit string. - - spec is a dictionary whose keys should include 'thousands_sep' and - 'grouping'; typically it's the result of parsing the format - specifier using _parse_format_specifier. - - The min_width keyword argument gives the minimum length of the - result, which will be padded on the left with zeros if necessary. - - If necessary, the zero padding adds an extra '0' on the left to - avoid a leading thousands separator. For example, inserting - commas every three digits in '123456', with min_width=8, gives - '0,123,456', even though that has length 9. - - """ - - sep = spec['thousands_sep'] - grouping = spec['grouping'] - - groups = [] - for l in _group_lengths(grouping): - if l <= 0: - raise ValueError("group length should be positive") - # max(..., 1) forces at least 1 digit to the left of a separator - l = min(max(len(digits), min_width, 1), l) - groups.append('0'*(l - len(digits)) + digits[-l:]) - digits = digits[:-l] - min_width -= l - if not digits and min_width <= 0: - break - min_width -= len(sep) - else: - l = max(len(digits), min_width, 1) - groups.append('0'*(l - len(digits)) + digits[-l:]) - return sep.join(reversed(groups)) - -def _format_sign(is_negative, spec): - """Determine sign character.""" - - if is_negative: - return '-' - elif spec['sign'] in ' +': - return spec['sign'] - else: - return '' - -def _format_number(is_negative, intpart, fracpart, exp, spec): - """Format a number, given the following data: - - is_negative: true if the number is negative, else false - intpart: string of digits that must appear before the decimal point - fracpart: string of digits that must come after the point - exp: exponent, as an integer - spec: dictionary resulting from parsing the format specifier - - This function uses the information in spec to: - insert separators (decimal separator and thousands separators) - format the sign - format the exponent - add trailing '%' for the '%' type - zero-pad if necessary - fill and align if necessary - """ - - sign = _format_sign(is_negative, spec) - - if fracpart: - fracpart = spec['decimal_point'] + fracpart - - if exp != 0 or spec['type'] in 'eE': - echar = {'E': 'E', 'e': 'e', 'G': 'E', 'g': 'e'}[spec['type']] - fracpart += "{0}{1:+}".format(echar, exp) - if spec['type'] == '%': - fracpart += '%' - - if spec['zeropad']: - min_width = spec['minimumwidth'] - len(fracpart) - len(sign) - else: - min_width = 0 - intpart = _insert_thousands_sep(intpart, spec, min_width) - - return _format_align(sign, intpart+fracpart, spec) - - -##### Useful Constants (internal use only) ################################ - -# Reusable defaults -_Infinity = Decimal('Inf') -_NegativeInfinity = Decimal('-Inf') -_NaN = Decimal('NaN') -_Zero = Decimal(0) -_One = Decimal(1) -_NegativeOne = Decimal(-1) - -# _SignedInfinity[sign] is infinity w/ that sign -_SignedInfinity = (_Infinity, _NegativeInfinity) - - - -if __name__ == '__main__': - import doctest, sys - doctest.testmod(sys.modules[__name__]) diff --git a/python/Lib/difflib.py b/python/Lib/difflib.py deleted file mode 100755 index 1c6fbdbedc..0000000000 --- a/python/Lib/difflib.py +++ /dev/null @@ -1,2057 +0,0 @@ -""" -Module difflib -- helpers for computing deltas between objects. - -Function get_close_matches(word, possibilities, n=3, cutoff=0.6): - Use SequenceMatcher to return list of the best "good enough" matches. - -Function context_diff(a, b): - For two lists of strings, return a delta in context diff format. - -Function ndiff(a, b): - Return a delta: the difference between `a` and `b` (lists of strings). - -Function restore(delta, which): - Return one of the two sequences that generated an ndiff delta. - -Function unified_diff(a, b): - For two lists of strings, return a delta in unified diff format. - -Class SequenceMatcher: - A flexible class for comparing pairs of sequences of any type. - -Class Differ: - For producing human-readable deltas from sequences of lines of text. - -Class HtmlDiff: - For producing HTML side by side comparison with change highlights. -""" - -__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher', - 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff', - 'unified_diff', 'HtmlDiff', 'Match'] - -import heapq -from collections import namedtuple as _namedtuple -from functools import reduce - -Match = _namedtuple('Match', 'a b size') - -def _calculate_ratio(matches, length): - if length: - return 2.0 * matches / length - return 1.0 - -class SequenceMatcher: - - """ - SequenceMatcher is a flexible class for comparing pairs of sequences of - any type, so long as the sequence elements are hashable. The basic - algorithm predates, and is a little fancier than, an algorithm - published in the late 1980's by Ratcliff and Obershelp under the - hyperbolic name "gestalt pattern matching". The basic idea is to find - the longest contiguous matching subsequence that contains no "junk" - elements (R-O doesn't address junk). The same idea is then applied - recursively to the pieces of the sequences to the left and to the right - of the matching subsequence. This does not yield minimal edit - sequences, but does tend to yield matches that "look right" to people. - - SequenceMatcher tries to compute a "human-friendly diff" between two - sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the - longest *contiguous* & junk-free matching subsequence. That's what - catches peoples' eyes. The Windows(tm) windiff has another interesting - notion, pairing up elements that appear uniquely in each sequence. - That, and the method here, appear to yield more intuitive difference - reports than does diff. This method appears to be the least vulnerable - to synching up on blocks of "junk lines", though (like blank lines in - ordinary text files, or maybe "

" lines in HTML files). That may be - because this is the only method of the 3 that has a *concept* of - "junk" . - - Example, comparing two strings, and considering blanks to be "junk": - - >>> s = SequenceMatcher(lambda x: x == " ", - ... "private Thread currentThread;", - ... "private volatile Thread currentThread;") - >>> - - .ratio() returns a float in [0, 1], measuring the "similarity" of the - sequences. As a rule of thumb, a .ratio() value over 0.6 means the - sequences are close matches: - - >>> print round(s.ratio(), 3) - 0.866 - >>> - - If you're only interested in where the sequences match, - .get_matching_blocks() is handy: - - >>> for block in s.get_matching_blocks(): - ... print "a[%d] and b[%d] match for %d elements" % block - a[0] and b[0] match for 8 elements - a[8] and b[17] match for 21 elements - a[29] and b[38] match for 0 elements - - Note that the last tuple returned by .get_matching_blocks() is always a - dummy, (len(a), len(b), 0), and this is the only case in which the last - tuple element (number of elements matched) is 0. - - If you want to know how to change the first sequence into the second, - use .get_opcodes(): - - >>> for opcode in s.get_opcodes(): - ... print "%6s a[%d:%d] b[%d:%d]" % opcode - equal a[0:8] b[0:8] - insert a[8:8] b[8:17] - equal a[8:29] b[17:38] - - See the Differ class for a fancy human-friendly file differencer, which - uses SequenceMatcher both to compare sequences of lines, and to compare - sequences of characters within similar (near-matching) lines. - - See also function get_close_matches() in this module, which shows how - simple code building on SequenceMatcher can be used to do useful work. - - Timing: Basic R-O is cubic time worst case and quadratic time expected - case. SequenceMatcher is quadratic time for the worst case and has - expected-case behavior dependent in a complicated way on how many - elements the sequences have in common; best case time is linear. - - Methods: - - __init__(isjunk=None, a='', b='') - Construct a SequenceMatcher. - - set_seqs(a, b) - Set the two sequences to be compared. - - set_seq1(a) - Set the first sequence to be compared. - - set_seq2(b) - Set the second sequence to be compared. - - find_longest_match(alo, ahi, blo, bhi) - Find longest matching block in a[alo:ahi] and b[blo:bhi]. - - get_matching_blocks() - Return list of triples describing matching subsequences. - - get_opcodes() - Return list of 5-tuples describing how to turn a into b. - - ratio() - Return a measure of the sequences' similarity (float in [0,1]). - - quick_ratio() - Return an upper bound on .ratio() relatively quickly. - - real_quick_ratio() - Return an upper bound on ratio() very quickly. - """ - - def __init__(self, isjunk=None, a='', b='', autojunk=True): - """Construct a SequenceMatcher. - - Optional arg isjunk is None (the default), or a one-argument - function that takes a sequence element and returns true iff the - element is junk. None is equivalent to passing "lambda x: 0", i.e. - no elements are considered to be junk. For example, pass - lambda x: x in " \\t" - if you're comparing lines as sequences of characters, and don't - want to synch up on blanks or hard tabs. - - Optional arg a is the first of two sequences to be compared. By - default, an empty string. The elements of a must be hashable. See - also .set_seqs() and .set_seq1(). - - Optional arg b is the second of two sequences to be compared. By - default, an empty string. The elements of b must be hashable. See - also .set_seqs() and .set_seq2(). - - Optional arg autojunk should be set to False to disable the - "automatic junk heuristic" that treats popular elements as junk - (see module documentation for more information). - """ - - # Members: - # a - # first sequence - # b - # second sequence; differences are computed as "what do - # we need to do to 'a' to change it into 'b'?" - # b2j - # for x in b, b2j[x] is a list of the indices (into b) - # at which x appears; junk elements do not appear - # fullbcount - # for x in b, fullbcount[x] == the number of times x - # appears in b; only materialized if really needed (used - # only for computing quick_ratio()) - # matching_blocks - # a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k]; - # ascending & non-overlapping in i and in j; terminated by - # a dummy (len(a), len(b), 0) sentinel - # opcodes - # a list of (tag, i1, i2, j1, j2) tuples, where tag is - # one of - # 'replace' a[i1:i2] should be replaced by b[j1:j2] - # 'delete' a[i1:i2] should be deleted - # 'insert' b[j1:j2] should be inserted - # 'equal' a[i1:i2] == b[j1:j2] - # isjunk - # a user-supplied function taking a sequence element and - # returning true iff the element is "junk" -- this has - # subtle but helpful effects on the algorithm, which I'll - # get around to writing up someday <0.9 wink>. - # DON'T USE! Only __chain_b uses this. Use isbjunk. - # isbjunk - # for x in b, isbjunk(x) == isjunk(x) but much faster; - # it's really the __contains__ method of a hidden dict. - # DOES NOT WORK for x in a! - # isbpopular - # for x in b, isbpopular(x) is true iff b is reasonably long - # (at least 200 elements) and x accounts for more than 1 + 1% of - # its elements (when autojunk is enabled). - # DOES NOT WORK for x in a! - - self.isjunk = isjunk - self.a = self.b = None - self.autojunk = autojunk - self.set_seqs(a, b) - - def set_seqs(self, a, b): - """Set the two sequences to be compared. - - >>> s = SequenceMatcher() - >>> s.set_seqs("abcd", "bcde") - >>> s.ratio() - 0.75 - """ - - self.set_seq1(a) - self.set_seq2(b) - - def set_seq1(self, a): - """Set the first sequence to be compared. - - The second sequence to be compared is not changed. - - >>> s = SequenceMatcher(None, "abcd", "bcde") - >>> s.ratio() - 0.75 - >>> s.set_seq1("bcde") - >>> s.ratio() - 1.0 - >>> - - SequenceMatcher computes and caches detailed information about the - second sequence, so if you want to compare one sequence S against - many sequences, use .set_seq2(S) once and call .set_seq1(x) - repeatedly for each of the other sequences. - - See also set_seqs() and set_seq2(). - """ - - if a is self.a: - return - self.a = a - self.matching_blocks = self.opcodes = None - - def set_seq2(self, b): - """Set the second sequence to be compared. - - The first sequence to be compared is not changed. - - >>> s = SequenceMatcher(None, "abcd", "bcde") - >>> s.ratio() - 0.75 - >>> s.set_seq2("abcd") - >>> s.ratio() - 1.0 - >>> - - SequenceMatcher computes and caches detailed information about the - second sequence, so if you want to compare one sequence S against - many sequences, use .set_seq2(S) once and call .set_seq1(x) - repeatedly for each of the other sequences. - - See also set_seqs() and set_seq1(). - """ - - if b is self.b: - return - self.b = b - self.matching_blocks = self.opcodes = None - self.fullbcount = None - self.__chain_b() - - # For each element x in b, set b2j[x] to a list of the indices in - # b where x appears; the indices are in increasing order; note that - # the number of times x appears in b is len(b2j[x]) ... - # when self.isjunk is defined, junk elements don't show up in this - # map at all, which stops the central find_longest_match method - # from starting any matching block at a junk element ... - # also creates the fast isbjunk function ... - # b2j also does not contain entries for "popular" elements, meaning - # elements that account for more than 1 + 1% of the total elements, and - # when the sequence is reasonably large (>= 200 elements); this can - # be viewed as an adaptive notion of semi-junk, and yields an enormous - # speedup when, e.g., comparing program files with hundreds of - # instances of "return NULL;" ... - # note that this is only called when b changes; so for cross-product - # kinds of matches, it's best to call set_seq2 once, then set_seq1 - # repeatedly - - def __chain_b(self): - # Because isjunk is a user-defined (not C) function, and we test - # for junk a LOT, it's important to minimize the number of calls. - # Before the tricks described here, __chain_b was by far the most - # time-consuming routine in the whole module! If anyone sees - # Jim Roskind, thank him again for profile.py -- I never would - # have guessed that. - # The first trick is to build b2j ignoring the possibility - # of junk. I.e., we don't call isjunk at all yet. Throwing - # out the junk later is much cheaper than building b2j "right" - # from the start. - b = self.b - self.b2j = b2j = {} - - for i, elt in enumerate(b): - indices = b2j.setdefault(elt, []) - indices.append(i) - - # Purge junk elements - junk = set() - isjunk = self.isjunk - if isjunk: - for elt in list(b2j.keys()): # using list() since b2j is modified - if isjunk(elt): - junk.add(elt) - del b2j[elt] - - # Purge popular elements that are not junk - popular = set() - n = len(b) - if self.autojunk and n >= 200: - ntest = n // 100 + 1 - for elt, idxs in list(b2j.items()): - if len(idxs) > ntest: - popular.add(elt) - del b2j[elt] - - # Now for x in b, isjunk(x) == x in junk, but the latter is much faster. - # Sicne the number of *unique* junk elements is probably small, the - # memory burden of keeping this set alive is likely trivial compared to - # the size of b2j. - self.isbjunk = junk.__contains__ - self.isbpopular = popular.__contains__ - - def find_longest_match(self, alo, ahi, blo, bhi): - """Find longest matching block in a[alo:ahi] and b[blo:bhi]. - - If isjunk is not defined: - - Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where - alo <= i <= i+k <= ahi - blo <= j <= j+k <= bhi - and for all (i',j',k') meeting those conditions, - k >= k' - i <= i' - and if i == i', j <= j' - - In other words, of all maximal matching blocks, return one that - starts earliest in a, and of all those maximal matching blocks that - start earliest in a, return the one that starts earliest in b. - - >>> s = SequenceMatcher(None, " abcd", "abcd abcd") - >>> s.find_longest_match(0, 5, 0, 9) - Match(a=0, b=4, size=5) - - If isjunk is defined, first the longest matching block is - determined as above, but with the additional restriction that no - junk element appears in the block. Then that block is extended as - far as possible by matching (only) junk elements on both sides. So - the resulting block never matches on junk except as identical junk - happens to be adjacent to an "interesting" match. - - Here's the same example as before, but considering blanks to be - junk. That prevents " abcd" from matching the " abcd" at the tail - end of the second sequence directly. Instead only the "abcd" can - match, and matches the leftmost "abcd" in the second sequence: - - >>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd") - >>> s.find_longest_match(0, 5, 0, 9) - Match(a=1, b=0, size=4) - - If no blocks match, return (alo, blo, 0). - - >>> s = SequenceMatcher(None, "ab", "c") - >>> s.find_longest_match(0, 2, 0, 1) - Match(a=0, b=0, size=0) - """ - - # CAUTION: stripping common prefix or suffix would be incorrect. - # E.g., - # ab - # acab - # Longest matching block is "ab", but if common prefix is - # stripped, it's "a" (tied with "b"). UNIX(tm) diff does so - # strip, so ends up claiming that ab is changed to acab by - # inserting "ca" in the middle. That's minimal but unintuitive: - # "it's obvious" that someone inserted "ac" at the front. - # Windiff ends up at the same place as diff, but by pairing up - # the unique 'b's and then matching the first two 'a's. - - a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk - besti, bestj, bestsize = alo, blo, 0 - # find longest junk-free match - # during an iteration of the loop, j2len[j] = length of longest - # junk-free match ending with a[i-1] and b[j] - j2len = {} - nothing = [] - for i in xrange(alo, ahi): - # look at all instances of a[i] in b; note that because - # b2j has no junk keys, the loop is skipped if a[i] is junk - j2lenget = j2len.get - newj2len = {} - for j in b2j.get(a[i], nothing): - # a[i] matches b[j] - if j < blo: - continue - if j >= bhi: - break - k = newj2len[j] = j2lenget(j-1, 0) + 1 - if k > bestsize: - besti, bestj, bestsize = i-k+1, j-k+1, k - j2len = newj2len - - # Extend the best by non-junk elements on each end. In particular, - # "popular" non-junk elements aren't in b2j, which greatly speeds - # the inner loop above, but also means "the best" match so far - # doesn't contain any junk *or* popular non-junk elements. - while besti > alo and bestj > blo and \ - not isbjunk(b[bestj-1]) and \ - a[besti-1] == b[bestj-1]: - besti, bestj, bestsize = besti-1, bestj-1, bestsize+1 - while besti+bestsize < ahi and bestj+bestsize < bhi and \ - not isbjunk(b[bestj+bestsize]) and \ - a[besti+bestsize] == b[bestj+bestsize]: - bestsize += 1 - - # Now that we have a wholly interesting match (albeit possibly - # empty!), we may as well suck up the matching junk on each - # side of it too. Can't think of a good reason not to, and it - # saves post-processing the (possibly considerable) expense of - # figuring out what to do with it. In the case of an empty - # interesting match, this is clearly the right thing to do, - # because no other kind of match is possible in the regions. - while besti > alo and bestj > blo and \ - isbjunk(b[bestj-1]) and \ - a[besti-1] == b[bestj-1]: - besti, bestj, bestsize = besti-1, bestj-1, bestsize+1 - while besti+bestsize < ahi and bestj+bestsize < bhi and \ - isbjunk(b[bestj+bestsize]) and \ - a[besti+bestsize] == b[bestj+bestsize]: - bestsize = bestsize + 1 - - return Match(besti, bestj, bestsize) - - def get_matching_blocks(self): - """Return list of triples describing matching subsequences. - - Each triple is of the form (i, j, n), and means that - a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in - i and in j. New in Python 2.5, it's also guaranteed that if - (i, j, n) and (i', j', n') are adjacent triples in the list, and - the second is not the last triple in the list, then i+n != i' or - j+n != j'. IOW, adjacent triples never describe adjacent equal - blocks. - - The last triple is a dummy, (len(a), len(b), 0), and is the only - triple with n==0. - - >>> s = SequenceMatcher(None, "abxcd", "abcd") - >>> s.get_matching_blocks() - [Match(a=0, b=0, size=2), Match(a=3, b=2, size=2), Match(a=5, b=4, size=0)] - """ - - if self.matching_blocks is not None: - return self.matching_blocks - la, lb = len(self.a), len(self.b) - - # This is most naturally expressed as a recursive algorithm, but - # at least one user bumped into extreme use cases that exceeded - # the recursion limit on their box. So, now we maintain a list - # ('queue`) of blocks we still need to look at, and append partial - # results to `matching_blocks` in a loop; the matches are sorted - # at the end. - queue = [(0, la, 0, lb)] - matching_blocks = [] - while queue: - alo, ahi, blo, bhi = queue.pop() - i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi) - # a[alo:i] vs b[blo:j] unknown - # a[i:i+k] same as b[j:j+k] - # a[i+k:ahi] vs b[j+k:bhi] unknown - if k: # if k is 0, there was no matching block - matching_blocks.append(x) - if alo < i and blo < j: - queue.append((alo, i, blo, j)) - if i+k < ahi and j+k < bhi: - queue.append((i+k, ahi, j+k, bhi)) - matching_blocks.sort() - - # It's possible that we have adjacent equal blocks in the - # matching_blocks list now. Starting with 2.5, this code was added - # to collapse them. - i1 = j1 = k1 = 0 - non_adjacent = [] - for i2, j2, k2 in matching_blocks: - # Is this block adjacent to i1, j1, k1? - if i1 + k1 == i2 and j1 + k1 == j2: - # Yes, so collapse them -- this just increases the length of - # the first block by the length of the second, and the first - # block so lengthened remains the block to compare against. - k1 += k2 - else: - # Not adjacent. Remember the first block (k1==0 means it's - # the dummy we started with), and make the second block the - # new block to compare against. - if k1: - non_adjacent.append((i1, j1, k1)) - i1, j1, k1 = i2, j2, k2 - if k1: - non_adjacent.append((i1, j1, k1)) - - non_adjacent.append( (la, lb, 0) ) - self.matching_blocks = map(Match._make, non_adjacent) - return self.matching_blocks - - def get_opcodes(self): - """Return list of 5-tuples describing how to turn a into b. - - Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple - has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the - tuple preceding it, and likewise for j1 == the previous j2. - - The tags are strings, with these meanings: - - 'replace': a[i1:i2] should be replaced by b[j1:j2] - 'delete': a[i1:i2] should be deleted. - Note that j1==j2 in this case. - 'insert': b[j1:j2] should be inserted at a[i1:i1]. - Note that i1==i2 in this case. - 'equal': a[i1:i2] == b[j1:j2] - - >>> a = "qabxcd" - >>> b = "abycdf" - >>> s = SequenceMatcher(None, a, b) - >>> for tag, i1, i2, j1, j2 in s.get_opcodes(): - ... print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" % - ... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])) - delete a[0:1] (q) b[0:0] () - equal a[1:3] (ab) b[0:2] (ab) - replace a[3:4] (x) b[2:3] (y) - equal a[4:6] (cd) b[3:5] (cd) - insert a[6:6] () b[5:6] (f) - """ - - if self.opcodes is not None: - return self.opcodes - i = j = 0 - self.opcodes = answer = [] - for ai, bj, size in self.get_matching_blocks(): - # invariant: we've pumped out correct diffs to change - # a[:i] into b[:j], and the next matching block is - # a[ai:ai+size] == b[bj:bj+size]. So we need to pump - # out a diff to change a[i:ai] into b[j:bj], pump out - # the matching block, and move (i,j) beyond the match - tag = '' - if i < ai and j < bj: - tag = 'replace' - elif i < ai: - tag = 'delete' - elif j < bj: - tag = 'insert' - if tag: - answer.append( (tag, i, ai, j, bj) ) - i, j = ai+size, bj+size - # the list of matching blocks is terminated by a - # sentinel with size 0 - if size: - answer.append( ('equal', ai, i, bj, j) ) - return answer - - def get_grouped_opcodes(self, n=3): - """ Isolate change clusters by eliminating ranges with no changes. - - Return a generator of groups with up to n lines of context. - Each group is in the same format as returned by get_opcodes(). - - >>> from pprint import pprint - >>> a = map(str, range(1,40)) - >>> b = a[:] - >>> b[8:8] = ['i'] # Make an insertion - >>> b[20] += 'x' # Make a replacement - >>> b[23:28] = [] # Make a deletion - >>> b[30] += 'y' # Make another replacement - >>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes())) - [[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)], - [('equal', 16, 19, 17, 20), - ('replace', 19, 20, 20, 21), - ('equal', 20, 22, 21, 23), - ('delete', 22, 27, 23, 23), - ('equal', 27, 30, 23, 26)], - [('equal', 31, 34, 27, 30), - ('replace', 34, 35, 30, 31), - ('equal', 35, 38, 31, 34)]] - """ - - codes = self.get_opcodes() - if not codes: - codes = [("equal", 0, 1, 0, 1)] - # Fixup leading and trailing groups if they show no changes. - if codes[0][0] == 'equal': - tag, i1, i2, j1, j2 = codes[0] - codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2 - if codes[-1][0] == 'equal': - tag, i1, i2, j1, j2 = codes[-1] - codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n) - - nn = n + n - group = [] - for tag, i1, i2, j1, j2 in codes: - # End the current group and start a new one whenever - # there is a large range with no changes. - if tag == 'equal' and i2-i1 > nn: - group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n))) - yield group - group = [] - i1, j1 = max(i1, i2-n), max(j1, j2-n) - group.append((tag, i1, i2, j1 ,j2)) - if group and not (len(group)==1 and group[0][0] == 'equal'): - yield group - - def ratio(self): - """Return a measure of the sequences' similarity (float in [0,1]). - - Where T is the total number of elements in both sequences, and - M is the number of matches, this is 2.0*M / T. - Note that this is 1 if the sequences are identical, and 0 if - they have nothing in common. - - .ratio() is expensive to compute if you haven't already computed - .get_matching_blocks() or .get_opcodes(), in which case you may - want to try .quick_ratio() or .real_quick_ratio() first to get an - upper bound. - - >>> s = SequenceMatcher(None, "abcd", "bcde") - >>> s.ratio() - 0.75 - >>> s.quick_ratio() - 0.75 - >>> s.real_quick_ratio() - 1.0 - """ - - matches = reduce(lambda sum, triple: sum + triple[-1], - self.get_matching_blocks(), 0) - return _calculate_ratio(matches, len(self.a) + len(self.b)) - - def quick_ratio(self): - """Return an upper bound on ratio() relatively quickly. - - This isn't defined beyond that it is an upper bound on .ratio(), and - is faster to compute. - """ - - # viewing a and b as multisets, set matches to the cardinality - # of their intersection; this counts the number of matches - # without regard to order, so is clearly an upper bound - if self.fullbcount is None: - self.fullbcount = fullbcount = {} - for elt in self.b: - fullbcount[elt] = fullbcount.get(elt, 0) + 1 - fullbcount = self.fullbcount - # avail[x] is the number of times x appears in 'b' less the - # number of times we've seen it in 'a' so far ... kinda - avail = {} - availhas, matches = avail.__contains__, 0 - for elt in self.a: - if availhas(elt): - numb = avail[elt] - else: - numb = fullbcount.get(elt, 0) - avail[elt] = numb - 1 - if numb > 0: - matches = matches + 1 - return _calculate_ratio(matches, len(self.a) + len(self.b)) - - def real_quick_ratio(self): - """Return an upper bound on ratio() very quickly. - - This isn't defined beyond that it is an upper bound on .ratio(), and - is faster to compute than either .ratio() or .quick_ratio(). - """ - - la, lb = len(self.a), len(self.b) - # can't have more matches than the number of elements in the - # shorter sequence - return _calculate_ratio(min(la, lb), la + lb) - -def get_close_matches(word, possibilities, n=3, cutoff=0.6): - """Use SequenceMatcher to return list of the best "good enough" matches. - - word is a sequence for which close matches are desired (typically a - string). - - possibilities is a list of sequences against which to match word - (typically a list of strings). - - Optional arg n (default 3) is the maximum number of close matches to - return. n must be > 0. - - Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities - that don't score at least that similar to word are ignored. - - The best (no more than n) matches among the possibilities are returned - in a list, sorted by similarity score, most similar first. - - >>> get_close_matches("appel", ["ape", "apple", "peach", "puppy"]) - ['apple', 'ape'] - >>> import keyword as _keyword - >>> get_close_matches("wheel", _keyword.kwlist) - ['while'] - >>> get_close_matches("apple", _keyword.kwlist) - [] - >>> get_close_matches("accept", _keyword.kwlist) - ['except'] - """ - - if not n > 0: - raise ValueError("n must be > 0: %r" % (n,)) - if not 0.0 <= cutoff <= 1.0: - raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,)) - result = [] - s = SequenceMatcher() - s.set_seq2(word) - for x in possibilities: - s.set_seq1(x) - if s.real_quick_ratio() >= cutoff and \ - s.quick_ratio() >= cutoff and \ - s.ratio() >= cutoff: - result.append((s.ratio(), x)) - - # Move the best scorers to head of list - result = heapq.nlargest(n, result) - # Strip scores for the best n matches - return [x for score, x in result] - -def _count_leading(line, ch): - """ - Return number of `ch` characters at the start of `line`. - - Example: - - >>> _count_leading(' abc', ' ') - 3 - """ - - i, n = 0, len(line) - while i < n and line[i] == ch: - i += 1 - return i - -class Differ: - r""" - Differ is a class for comparing sequences of lines of text, and - producing human-readable differences or deltas. Differ uses - SequenceMatcher both to compare sequences of lines, and to compare - sequences of characters within similar (near-matching) lines. - - Each line of a Differ delta begins with a two-letter code: - - '- ' line unique to sequence 1 - '+ ' line unique to sequence 2 - ' ' line common to both sequences - '? ' line not present in either input sequence - - Lines beginning with '? ' attempt to guide the eye to intraline - differences, and were not present in either input sequence. These lines - can be confusing if the sequences contain tab characters. - - Note that Differ makes no claim to produce a *minimal* diff. To the - contrary, minimal diffs are often counter-intuitive, because they synch - up anywhere possible, sometimes accidental matches 100 pages apart. - Restricting synch points to contiguous matches preserves some notion of - locality, at the occasional cost of producing a longer diff. - - Example: Comparing two texts. - - First we set up the texts, sequences of individual single-line strings - ending with newlines (such sequences can also be obtained from the - `readlines()` method of file-like objects): - - >>> text1 = ''' 1. Beautiful is better than ugly. - ... 2. Explicit is better than implicit. - ... 3. Simple is better than complex. - ... 4. Complex is better than complicated. - ... '''.splitlines(1) - >>> len(text1) - 4 - >>> text1[0][-1] - '\n' - >>> text2 = ''' 1. Beautiful is better than ugly. - ... 3. Simple is better than complex. - ... 4. Complicated is better than complex. - ... 5. Flat is better than nested. - ... '''.splitlines(1) - - Next we instantiate a Differ object: - - >>> d = Differ() - - Note that when instantiating a Differ object we may pass functions to - filter out line and character 'junk'. See Differ.__init__ for details. - - Finally, we compare the two: - - >>> result = list(d.compare(text1, text2)) - - 'result' is a list of strings, so let's pretty-print it: - - >>> from pprint import pprint as _pprint - >>> _pprint(result) - [' 1. Beautiful is better than ugly.\n', - '- 2. Explicit is better than implicit.\n', - '- 3. Simple is better than complex.\n', - '+ 3. Simple is better than complex.\n', - '? ++\n', - '- 4. Complex is better than complicated.\n', - '? ^ ---- ^\n', - '+ 4. Complicated is better than complex.\n', - '? ++++ ^ ^\n', - '+ 5. Flat is better than nested.\n'] - - As a single multi-line string it looks like this: - - >>> print ''.join(result), - 1. Beautiful is better than ugly. - - 2. Explicit is better than implicit. - - 3. Simple is better than complex. - + 3. Simple is better than complex. - ? ++ - - 4. Complex is better than complicated. - ? ^ ---- ^ - + 4. Complicated is better than complex. - ? ++++ ^ ^ - + 5. Flat is better than nested. - - Methods: - - __init__(linejunk=None, charjunk=None) - Construct a text differencer, with optional filters. - - compare(a, b) - Compare two sequences of lines; generate the resulting delta. - """ - - def __init__(self, linejunk=None, charjunk=None): - """ - Construct a text differencer, with optional filters. - - The two optional keyword parameters are for filter functions: - - - `linejunk`: A function that should accept a single string argument, - and return true iff the string is junk. The module-level function - `IS_LINE_JUNK` may be used to filter out lines without visible - characters, except for at most one splat ('#'). It is recommended - to leave linejunk None; as of Python 2.3, the underlying - SequenceMatcher class has grown an adaptive notion of "noise" lines - that's better than any static definition the author has ever been - able to craft. - - - `charjunk`: A function that should accept a string of length 1. The - module-level function `IS_CHARACTER_JUNK` may be used to filter out - whitespace characters (a blank or tab; **note**: bad idea to include - newline in this!). Use of IS_CHARACTER_JUNK is recommended. - """ - - self.linejunk = linejunk - self.charjunk = charjunk - - def compare(self, a, b): - r""" - Compare two sequences of lines; generate the resulting delta. - - Each sequence must contain individual single-line strings ending with - newlines. Such sequences can be obtained from the `readlines()` method - of file-like objects. The delta generated also consists of newline- - terminated strings, ready to be printed as-is via the writeline() - method of a file-like object. - - Example: - - >>> print ''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(1), - ... 'ore\ntree\nemu\n'.splitlines(1))), - - one - ? ^ - + ore - ? ^ - - two - - three - ? - - + tree - + emu - """ - - cruncher = SequenceMatcher(self.linejunk, a, b) - for tag, alo, ahi, blo, bhi in cruncher.get_opcodes(): - if tag == 'replace': - g = self._fancy_replace(a, alo, ahi, b, blo, bhi) - elif tag == 'delete': - g = self._dump('-', a, alo, ahi) - elif tag == 'insert': - g = self._dump('+', b, blo, bhi) - elif tag == 'equal': - g = self._dump(' ', a, alo, ahi) - else: - raise ValueError, 'unknown tag %r' % (tag,) - - for line in g: - yield line - - def _dump(self, tag, x, lo, hi): - """Generate comparison results for a same-tagged range.""" - for i in xrange(lo, hi): - yield '%s %s' % (tag, x[i]) - - def _plain_replace(self, a, alo, ahi, b, blo, bhi): - assert alo < ahi and blo < bhi - # dump the shorter block first -- reduces the burden on short-term - # memory if the blocks are of very different sizes - if bhi - blo < ahi - alo: - first = self._dump('+', b, blo, bhi) - second = self._dump('-', a, alo, ahi) - else: - first = self._dump('-', a, alo, ahi) - second = self._dump('+', b, blo, bhi) - - for g in first, second: - for line in g: - yield line - - def _fancy_replace(self, a, alo, ahi, b, blo, bhi): - r""" - When replacing one block of lines with another, search the blocks - for *similar* lines; the best-matching pair (if any) is used as a - synch point, and intraline difference marking is done on the - similar pair. Lots of work, but often worth it. - - Example: - - >>> d = Differ() - >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1, - ... ['abcdefGhijkl\n'], 0, 1) - >>> print ''.join(results), - - abcDefghiJkl - ? ^ ^ ^ - + abcdefGhijkl - ? ^ ^ ^ - """ - - # don't synch up unless the lines have a similarity score of at - # least cutoff; best_ratio tracks the best score seen so far - best_ratio, cutoff = 0.74, 0.75 - cruncher = SequenceMatcher(self.charjunk) - eqi, eqj = None, None # 1st indices of equal lines (if any) - - # search for the pair that matches best without being identical - # (identical lines must be junk lines, & we don't want to synch up - # on junk -- unless we have to) - for j in xrange(blo, bhi): - bj = b[j] - cruncher.set_seq2(bj) - for i in xrange(alo, ahi): - ai = a[i] - if ai == bj: - if eqi is None: - eqi, eqj = i, j - continue - cruncher.set_seq1(ai) - # computing similarity is expensive, so use the quick - # upper bounds first -- have seen this speed up messy - # compares by a factor of 3. - # note that ratio() is only expensive to compute the first - # time it's called on a sequence pair; the expensive part - # of the computation is cached by cruncher - if cruncher.real_quick_ratio() > best_ratio and \ - cruncher.quick_ratio() > best_ratio and \ - cruncher.ratio() > best_ratio: - best_ratio, best_i, best_j = cruncher.ratio(), i, j - if best_ratio < cutoff: - # no non-identical "pretty close" pair - if eqi is None: - # no identical pair either -- treat it as a straight replace - for line in self._plain_replace(a, alo, ahi, b, blo, bhi): - yield line - return - # no close pair, but an identical pair -- synch up on that - best_i, best_j, best_ratio = eqi, eqj, 1.0 - else: - # there's a close pair, so forget the identical pair (if any) - eqi = None - - # a[best_i] very similar to b[best_j]; eqi is None iff they're not - # identical - - # pump out diffs from before the synch point - for line in self._fancy_helper(a, alo, best_i, b, blo, best_j): - yield line - - # do intraline marking on the synch pair - aelt, belt = a[best_i], b[best_j] - if eqi is None: - # pump out a '-', '?', '+', '?' quad for the synched lines - atags = btags = "" - cruncher.set_seqs(aelt, belt) - for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes(): - la, lb = ai2 - ai1, bj2 - bj1 - if tag == 'replace': - atags += '^' * la - btags += '^' * lb - elif tag == 'delete': - atags += '-' * la - elif tag == 'insert': - btags += '+' * lb - elif tag == 'equal': - atags += ' ' * la - btags += ' ' * lb - else: - raise ValueError, 'unknown tag %r' % (tag,) - for line in self._qformat(aelt, belt, atags, btags): - yield line - else: - # the synch pair is identical - yield ' ' + aelt - - # pump out diffs from after the synch point - for line in self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi): - yield line - - def _fancy_helper(self, a, alo, ahi, b, blo, bhi): - g = [] - if alo < ahi: - if blo < bhi: - g = self._fancy_replace(a, alo, ahi, b, blo, bhi) - else: - g = self._dump('-', a, alo, ahi) - elif blo < bhi: - g = self._dump('+', b, blo, bhi) - - for line in g: - yield line - - def _qformat(self, aline, bline, atags, btags): - r""" - Format "?" output and deal with leading tabs. - - Example: - - >>> d = Differ() - >>> results = d._qformat('\tabcDefghiJkl\n', '\tabcdefGhijkl\n', - ... ' ^ ^ ^ ', ' ^ ^ ^ ') - >>> for line in results: print repr(line) - ... - '- \tabcDefghiJkl\n' - '? \t ^ ^ ^\n' - '+ \tabcdefGhijkl\n' - '? \t ^ ^ ^\n' - """ - - # Can hurt, but will probably help most of the time. - common = min(_count_leading(aline, "\t"), - _count_leading(bline, "\t")) - common = min(common, _count_leading(atags[:common], " ")) - common = min(common, _count_leading(btags[:common], " ")) - atags = atags[common:].rstrip() - btags = btags[common:].rstrip() - - yield "- " + aline - if atags: - yield "? %s%s\n" % ("\t" * common, atags) - - yield "+ " + bline - if btags: - yield "? %s%s\n" % ("\t" * common, btags) - -# With respect to junk, an earlier version of ndiff simply refused to -# *start* a match with a junk element. The result was cases like this: -# before: private Thread currentThread; -# after: private volatile Thread currentThread; -# If you consider whitespace to be junk, the longest contiguous match -# not starting with junk is "e Thread currentThread". So ndiff reported -# that "e volatil" was inserted between the 't' and the 'e' in "private". -# While an accurate view, to people that's absurd. The current version -# looks for matching blocks that are entirely junk-free, then extends the -# longest one of those as far as possible but only with matching junk. -# So now "currentThread" is matched, then extended to suck up the -# preceding blank; then "private" is matched, and extended to suck up the -# following blank; then "Thread" is matched; and finally ndiff reports -# that "volatile " was inserted before "Thread". The only quibble -# remaining is that perhaps it was really the case that " volatile" -# was inserted after "private". I can live with that . - -import re - -def IS_LINE_JUNK(line, pat=re.compile(r"\s*#?\s*$").match): - r""" - Return 1 for ignorable line: iff `line` is blank or contains a single '#'. - - Examples: - - >>> IS_LINE_JUNK('\n') - True - >>> IS_LINE_JUNK(' # \n') - True - >>> IS_LINE_JUNK('hello\n') - False - """ - - return pat(line) is not None - -def IS_CHARACTER_JUNK(ch, ws=" \t"): - r""" - Return 1 for ignorable character: iff `ch` is a space or tab. - - Examples: - - >>> IS_CHARACTER_JUNK(' ') - True - >>> IS_CHARACTER_JUNK('\t') - True - >>> IS_CHARACTER_JUNK('\n') - False - >>> IS_CHARACTER_JUNK('x') - False - """ - - return ch in ws - - -######################################################################## -### Unified Diff -######################################################################## - -def _format_range_unified(start, stop): - 'Convert range to the "ed" format' - # Per the diff spec at http://www.unix.org/single_unix_specification/ - beginning = start + 1 # lines start numbering with one - length = stop - start - if length == 1: - return '{}'.format(beginning) - if not length: - beginning -= 1 # empty ranges begin at line just before the range - return '{},{}'.format(beginning, length) - -def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', - tofiledate='', n=3, lineterm='\n'): - r""" - Compare two sequences of lines; generate the delta as a unified diff. - - Unified diffs are a compact way of showing line changes and a few - lines of context. The number of context lines is set by 'n' which - defaults to three. - - By default, the diff control lines (those with ---, +++, or @@) are - created with a trailing newline. This is helpful so that inputs - created from file.readlines() result in diffs that are suitable for - file.writelines() since both the inputs and outputs have trailing - newlines. - - For inputs that do not have trailing newlines, set the lineterm - argument to "" so that the output will be uniformly newline free. - - The unidiff format normally has a header for filenames and modification - times. Any or all of these may be specified using strings for - 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. - The modification times are normally expressed in the ISO 8601 format. - - Example: - - >>> for line in unified_diff('one two three four'.split(), - ... 'zero one tree four'.split(), 'Original', 'Current', - ... '2005-01-26 23:30:50', '2010-04-02 10:20:52', - ... lineterm=''): - ... print line # doctest: +NORMALIZE_WHITESPACE - --- Original 2005-01-26 23:30:50 - +++ Current 2010-04-02 10:20:52 - @@ -1,4 +1,4 @@ - +zero - one - -two - -three - +tree - four - """ - - started = False - for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n): - if not started: - started = True - fromdate = '\t{}'.format(fromfiledate) if fromfiledate else '' - todate = '\t{}'.format(tofiledate) if tofiledate else '' - yield '--- {}{}{}'.format(fromfile, fromdate, lineterm) - yield '+++ {}{}{}'.format(tofile, todate, lineterm) - - first, last = group[0], group[-1] - file1_range = _format_range_unified(first[1], last[2]) - file2_range = _format_range_unified(first[3], last[4]) - yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm) - - for tag, i1, i2, j1, j2 in group: - if tag == 'equal': - for line in a[i1:i2]: - yield ' ' + line - continue - if tag in ('replace', 'delete'): - for line in a[i1:i2]: - yield '-' + line - if tag in ('replace', 'insert'): - for line in b[j1:j2]: - yield '+' + line - - -######################################################################## -### Context Diff -######################################################################## - -def _format_range_context(start, stop): - 'Convert range to the "ed" format' - # Per the diff spec at http://www.unix.org/single_unix_specification/ - beginning = start + 1 # lines start numbering with one - length = stop - start - if not length: - beginning -= 1 # empty ranges begin at line just before the range - if length <= 1: - return '{}'.format(beginning) - return '{},{}'.format(beginning, beginning + length - 1) - -# See http://www.unix.org/single_unix_specification/ -def context_diff(a, b, fromfile='', tofile='', - fromfiledate='', tofiledate='', n=3, lineterm='\n'): - r""" - Compare two sequences of lines; generate the delta as a context diff. - - Context diffs are a compact way of showing line changes and a few - lines of context. The number of context lines is set by 'n' which - defaults to three. - - By default, the diff control lines (those with *** or ---) are - created with a trailing newline. This is helpful so that inputs - created from file.readlines() result in diffs that are suitable for - file.writelines() since both the inputs and outputs have trailing - newlines. - - For inputs that do not have trailing newlines, set the lineterm - argument to "" so that the output will be uniformly newline free. - - The context diff format normally has a header for filenames and - modification times. Any or all of these may be specified using - strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. - The modification times are normally expressed in the ISO 8601 format. - If not specified, the strings default to blanks. - - Example: - - >>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1), - ... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current')), - *** Original - --- Current - *************** - *** 1,4 **** - one - ! two - ! three - four - --- 1,4 ---- - + zero - one - ! tree - four - """ - - prefix = dict(insert='+ ', delete='- ', replace='! ', equal=' ') - started = False - for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n): - if not started: - started = True - fromdate = '\t{}'.format(fromfiledate) if fromfiledate else '' - todate = '\t{}'.format(tofiledate) if tofiledate else '' - yield '*** {}{}{}'.format(fromfile, fromdate, lineterm) - yield '--- {}{}{}'.format(tofile, todate, lineterm) - - first, last = group[0], group[-1] - yield '***************' + lineterm - - file1_range = _format_range_context(first[1], last[2]) - yield '*** {} ****{}'.format(file1_range, lineterm) - - if any(tag in ('replace', 'delete') for tag, _, _, _, _ in group): - for tag, i1, i2, _, _ in group: - if tag != 'insert': - for line in a[i1:i2]: - yield prefix[tag] + line - - file2_range = _format_range_context(first[3], last[4]) - yield '--- {} ----{}'.format(file2_range, lineterm) - - if any(tag in ('replace', 'insert') for tag, _, _, _, _ in group): - for tag, _, _, j1, j2 in group: - if tag != 'delete': - for line in b[j1:j2]: - yield prefix[tag] + line - -def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK): - r""" - Compare `a` and `b` (lists of strings); return a `Differ`-style delta. - - Optional keyword parameters `linejunk` and `charjunk` are for filter - functions (or None): - - - linejunk: A function that should accept a single string argument, and - return true iff the string is junk. The default is None, and is - recommended; as of Python 2.3, an adaptive notion of "noise" lines is - used that does a good job on its own. - - - charjunk: A function that should accept a string of length 1. The - default is module-level function IS_CHARACTER_JUNK, which filters out - whitespace characters (a blank or tab; note: bad idea to include newline - in this!). - - Tools/scripts/ndiff.py is a command-line front-end to this function. - - Example: - - >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1), - ... 'ore\ntree\nemu\n'.splitlines(1)) - >>> print ''.join(diff), - - one - ? ^ - + ore - ? ^ - - two - - three - ? - - + tree - + emu - """ - return Differ(linejunk, charjunk).compare(a, b) - -def _mdiff(fromlines, tolines, context=None, linejunk=None, - charjunk=IS_CHARACTER_JUNK): - r"""Returns generator yielding marked up from/to side by side differences. - - Arguments: - fromlines -- list of text lines to compared to tolines - tolines -- list of text lines to be compared to fromlines - context -- number of context lines to display on each side of difference, - if None, all from/to text lines will be generated. - linejunk -- passed on to ndiff (see ndiff documentation) - charjunk -- passed on to ndiff (see ndiff documentation) - - This function returns an iterator which returns a tuple: - (from line tuple, to line tuple, boolean flag) - - from/to line tuple -- (line num, line text) - line num -- integer or None (to indicate a context separation) - line text -- original line text with following markers inserted: - '\0+' -- marks start of added text - '\0-' -- marks start of deleted text - '\0^' -- marks start of changed text - '\1' -- marks end of added/deleted/changed text - - boolean flag -- None indicates context separation, True indicates - either "from" or "to" line contains a change, otherwise False. - - This function/iterator was originally developed to generate side by side - file difference for making HTML pages (see HtmlDiff class for example - usage). - - Note, this function utilizes the ndiff function to generate the side by - side difference markup. Optional ndiff arguments may be passed to this - function and they in turn will be passed to ndiff. - """ - import re - - # regular expression for finding intraline change indices - change_re = re.compile('(\++|\-+|\^+)') - - # create the difference iterator to generate the differences - diff_lines_iterator = ndiff(fromlines,tolines,linejunk,charjunk) - - def _make_line(lines, format_key, side, num_lines=[0,0]): - """Returns line of text with user's change markup and line formatting. - - lines -- list of lines from the ndiff generator to produce a line of - text from. When producing the line of text to return, the - lines used are removed from this list. - format_key -- '+' return first line in list with "add" markup around - the entire line. - '-' return first line in list with "delete" markup around - the entire line. - '?' return first line in list with add/delete/change - intraline markup (indices obtained from second line) - None return first line in list with no markup - side -- indice into the num_lines list (0=from,1=to) - num_lines -- from/to current line number. This is NOT intended to be a - passed parameter. It is present as a keyword argument to - maintain memory of the current line numbers between calls - of this function. - - Note, this function is purposefully not defined at the module scope so - that data it needs from its parent function (within whose context it - is defined) does not need to be of module scope. - """ - num_lines[side] += 1 - # Handle case where no user markup is to be added, just return line of - # text with user's line format to allow for usage of the line number. - if format_key is None: - return (num_lines[side],lines.pop(0)[2:]) - # Handle case of intraline changes - if format_key == '?': - text, markers = lines.pop(0), lines.pop(0) - # find intraline changes (store change type and indices in tuples) - sub_info = [] - def record_sub_info(match_object,sub_info=sub_info): - sub_info.append([match_object.group(1)[0],match_object.span()]) - return match_object.group(1) - change_re.sub(record_sub_info,markers) - # process each tuple inserting our special marks that won't be - # noticed by an xml/html escaper. - for key,(begin,end) in sub_info[::-1]: - text = text[0:begin]+'\0'+key+text[begin:end]+'\1'+text[end:] - text = text[2:] - # Handle case of add/delete entire line - else: - text = lines.pop(0)[2:] - # if line of text is just a newline, insert a space so there is - # something for the user to highlight and see. - if not text: - text = ' ' - # insert marks that won't be noticed by an xml/html escaper. - text = '\0' + format_key + text + '\1' - # Return line of text, first allow user's line formatter to do its - # thing (such as adding the line number) then replace the special - # marks with what the user's change markup. - return (num_lines[side],text) - - def _line_iterator(): - """Yields from/to lines of text with a change indication. - - This function is an iterator. It itself pulls lines from a - differencing iterator, processes them and yields them. When it can - it yields both a "from" and a "to" line, otherwise it will yield one - or the other. In addition to yielding the lines of from/to text, a - boolean flag is yielded to indicate if the text line(s) have - differences in them. - - Note, this function is purposefully not defined at the module scope so - that data it needs from its parent function (within whose context it - is defined) does not need to be of module scope. - """ - lines = [] - num_blanks_pending, num_blanks_to_yield = 0, 0 - while True: - # Load up next 4 lines so we can look ahead, create strings which - # are a concatenation of the first character of each of the 4 lines - # so we can do some very readable comparisons. - while len(lines) < 4: - try: - lines.append(diff_lines_iterator.next()) - except StopIteration: - lines.append('X') - s = ''.join([line[0] for line in lines]) - if s.startswith('X'): - # When no more lines, pump out any remaining blank lines so the - # corresponding add/delete lines get a matching blank line so - # all line pairs get yielded at the next level. - num_blanks_to_yield = num_blanks_pending - elif s.startswith('-?+?'): - # simple intraline change - yield _make_line(lines,'?',0), _make_line(lines,'?',1), True - continue - elif s.startswith('--++'): - # in delete block, add block coming: we do NOT want to get - # caught up on blank lines yet, just process the delete line - num_blanks_pending -= 1 - yield _make_line(lines,'-',0), None, True - continue - elif s.startswith(('--?+', '--+', '- ')): - # in delete block and see an intraline change or unchanged line - # coming: yield the delete line and then blanks - from_line,to_line = _make_line(lines,'-',0), None - num_blanks_to_yield,num_blanks_pending = num_blanks_pending-1,0 - elif s.startswith('-+?'): - # intraline change - yield _make_line(lines,None,0), _make_line(lines,'?',1), True - continue - elif s.startswith('-?+'): - # intraline change - yield _make_line(lines,'?',0), _make_line(lines,None,1), True - continue - elif s.startswith('-'): - # delete FROM line - num_blanks_pending -= 1 - yield _make_line(lines,'-',0), None, True - continue - elif s.startswith('+--'): - # in add block, delete block coming: we do NOT want to get - # caught up on blank lines yet, just process the add line - num_blanks_pending += 1 - yield None, _make_line(lines,'+',1), True - continue - elif s.startswith(('+ ', '+-')): - # will be leaving an add block: yield blanks then add line - from_line, to_line = None, _make_line(lines,'+',1) - num_blanks_to_yield,num_blanks_pending = num_blanks_pending+1,0 - elif s.startswith('+'): - # inside an add block, yield the add line - num_blanks_pending += 1 - yield None, _make_line(lines,'+',1), True - continue - elif s.startswith(' '): - # unchanged text, yield it to both sides - yield _make_line(lines[:],None,0),_make_line(lines,None,1),False - continue - # Catch up on the blank lines so when we yield the next from/to - # pair, they are lined up. - while(num_blanks_to_yield < 0): - num_blanks_to_yield += 1 - yield None,('','\n'),True - while(num_blanks_to_yield > 0): - num_blanks_to_yield -= 1 - yield ('','\n'),None,True - if s.startswith('X'): - raise StopIteration - else: - yield from_line,to_line,True - - def _line_pair_iterator(): - """Yields from/to lines of text with a change indication. - - This function is an iterator. It itself pulls lines from the line - iterator. Its difference from that iterator is that this function - always yields a pair of from/to text lines (with the change - indication). If necessary it will collect single from/to lines - until it has a matching pair from/to pair to yield. - - Note, this function is purposefully not defined at the module scope so - that data it needs from its parent function (within whose context it - is defined) does not need to be of module scope. - """ - line_iterator = _line_iterator() - fromlines,tolines=[],[] - while True: - # Collecting lines of text until we have a from/to pair - while (len(fromlines)==0 or len(tolines)==0): - from_line, to_line, found_diff =line_iterator.next() - if from_line is not None: - fromlines.append((from_line,found_diff)) - if to_line is not None: - tolines.append((to_line,found_diff)) - # Once we have a pair, remove them from the collection and yield it - from_line, fromDiff = fromlines.pop(0) - to_line, to_diff = tolines.pop(0) - yield (from_line,to_line,fromDiff or to_diff) - - # Handle case where user does not want context differencing, just yield - # them up without doing anything else with them. - line_pair_iterator = _line_pair_iterator() - if context is None: - while True: - yield line_pair_iterator.next() - # Handle case where user wants context differencing. We must do some - # storage of lines until we know for sure that they are to be yielded. - else: - context += 1 - lines_to_write = 0 - while True: - # Store lines up until we find a difference, note use of a - # circular queue because we only need to keep around what - # we need for context. - index, contextLines = 0, [None]*(context) - found_diff = False - while(found_diff is False): - from_line, to_line, found_diff = line_pair_iterator.next() - i = index % context - contextLines[i] = (from_line, to_line, found_diff) - index += 1 - # Yield lines that we have collected so far, but first yield - # the user's separator. - if index > context: - yield None, None, None - lines_to_write = context - else: - lines_to_write = index - index = 0 - while(lines_to_write): - i = index % context - index += 1 - yield contextLines[i] - lines_to_write -= 1 - # Now yield the context lines after the change - lines_to_write = context-1 - while(lines_to_write): - from_line, to_line, found_diff = line_pair_iterator.next() - # If another change within the context, extend the context - if found_diff: - lines_to_write = context-1 - else: - lines_to_write -= 1 - yield from_line, to_line, found_diff - - -_file_template = """ - - - - - - - - - - - - %(table)s%(legend)s - - -""" - -_styles = """ - table.diff {font-family:Courier; border:medium;} - .diff_header {background-color:#e0e0e0} - td.diff_header {text-align:right} - .diff_next {background-color:#c0c0c0} - .diff_add {background-color:#aaffaa} - .diff_chg {background-color:#ffff77} - .diff_sub {background-color:#ffaaaa}""" - -_table_template = """ - - - - %(header_row)s - -%(data_rows)s -
""" - -_legend = """ - - - - -
Legends
- - - - -
Colors
 Added 
Changed
Deleted
- - - - -
Links
(f)irst change
(n)ext change
(t)op
""" - -class HtmlDiff(object): - """For producing HTML side by side comparison with change highlights. - - This class can be used to create an HTML table (or a complete HTML file - containing the table) showing a side by side, line by line comparison - of text with inter-line and intra-line change highlights. The table can - be generated in either full or contextual difference mode. - - The following methods are provided for HTML generation: - - make_table -- generates HTML for a single side by side table - make_file -- generates complete HTML file with a single side by side table - - See tools/scripts/diff.py for an example usage of this class. - """ - - _file_template = _file_template - _styles = _styles - _table_template = _table_template - _legend = _legend - _default_prefix = 0 - - def __init__(self,tabsize=8,wrapcolumn=None,linejunk=None, - charjunk=IS_CHARACTER_JUNK): - """HtmlDiff instance initializer - - Arguments: - tabsize -- tab stop spacing, defaults to 8. - wrapcolumn -- column number where lines are broken and wrapped, - defaults to None where lines are not wrapped. - linejunk,charjunk -- keyword arguments passed into ndiff() (used to by - HtmlDiff() to generate the side by side HTML differences). See - ndiff() documentation for argument default values and descriptions. - """ - self._tabsize = tabsize - self._wrapcolumn = wrapcolumn - self._linejunk = linejunk - self._charjunk = charjunk - - def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False, - numlines=5): - """Returns HTML file of side by side comparison with change highlights - - Arguments: - fromlines -- list of "from" lines - tolines -- list of "to" lines - fromdesc -- "from" file column header string - todesc -- "to" file column header string - context -- set to True for contextual differences (defaults to False - which shows full differences). - numlines -- number of context lines. When context is set True, - controls number of lines displayed before and after the change. - When context is False, controls the number of lines to place - the "next" link anchors before the next change (so click of - "next" link jumps to just before the change). - """ - - return self._file_template % dict( - styles = self._styles, - legend = self._legend, - table = self.make_table(fromlines,tolines,fromdesc,todesc, - context=context,numlines=numlines)) - - def _tab_newline_replace(self,fromlines,tolines): - """Returns from/to line lists with tabs expanded and newlines removed. - - Instead of tab characters being replaced by the number of spaces - needed to fill in to the next tab stop, this function will fill - the space with tab characters. This is done so that the difference - algorithms can identify changes in a file when tabs are replaced by - spaces and vice versa. At the end of the HTML generation, the tab - characters will be replaced with a nonbreakable space. - """ - def expand_tabs(line): - # hide real spaces - line = line.replace(' ','\0') - # expand tabs into spaces - line = line.expandtabs(self._tabsize) - # replace spaces from expanded tabs back into tab characters - # (we'll replace them with markup after we do differencing) - line = line.replace(' ','\t') - return line.replace('\0',' ').rstrip('\n') - fromlines = [expand_tabs(line) for line in fromlines] - tolines = [expand_tabs(line) for line in tolines] - return fromlines,tolines - - def _split_line(self,data_list,line_num,text): - """Builds list of text lines by splitting text lines at wrap point - - This function will determine if the input text line needs to be - wrapped (split) into separate lines. If so, the first wrap point - will be determined and the first line appended to the output - text line list. This function is used recursively to handle - the second part of the split line to further split it. - """ - # if blank line or context separator, just add it to the output list - if not line_num: - data_list.append((line_num,text)) - return - - # if line text doesn't need wrapping, just add it to the output list - size = len(text) - max = self._wrapcolumn - if (size <= max) or ((size -(text.count('\0')*3)) <= max): - data_list.append((line_num,text)) - return - - # scan text looking for the wrap point, keeping track if the wrap - # point is inside markers - i = 0 - n = 0 - mark = '' - while n < max and i < size: - if text[i] == '\0': - i += 1 - mark = text[i] - i += 1 - elif text[i] == '\1': - i += 1 - mark = '' - else: - i += 1 - n += 1 - - # wrap point is inside text, break it up into separate lines - line1 = text[:i] - line2 = text[i:] - - # if wrap point is inside markers, place end marker at end of first - # line and start marker at beginning of second line because each - # line will have its own table tag markup around it. - if mark: - line1 = line1 + '\1' - line2 = '\0' + mark + line2 - - # tack on first line onto the output list - data_list.append((line_num,line1)) - - # use this routine again to wrap the remaining text - self._split_line(data_list,'>',line2) - - def _line_wrapper(self,diffs): - """Returns iterator that splits (wraps) mdiff text lines""" - - # pull from/to data and flags from mdiff iterator - for fromdata,todata,flag in diffs: - # check for context separators and pass them through - if flag is None: - yield fromdata,todata,flag - continue - (fromline,fromtext),(toline,totext) = fromdata,todata - # for each from/to line split it at the wrap column to form - # list of text lines. - fromlist,tolist = [],[] - self._split_line(fromlist,fromline,fromtext) - self._split_line(tolist,toline,totext) - # yield from/to line in pairs inserting blank lines as - # necessary when one side has more wrapped lines - while fromlist or tolist: - if fromlist: - fromdata = fromlist.pop(0) - else: - fromdata = ('',' ') - if tolist: - todata = tolist.pop(0) - else: - todata = ('',' ') - yield fromdata,todata,flag - - def _collect_lines(self,diffs): - """Collects mdiff output into separate lists - - Before storing the mdiff from/to data into a list, it is converted - into a single line of text with HTML markup. - """ - - fromlist,tolist,flaglist = [],[],[] - # pull from/to data and flags from mdiff style iterator - for fromdata,todata,flag in diffs: - try: - # store HTML markup of the lines into the lists - fromlist.append(self._format_line(0,flag,*fromdata)) - tolist.append(self._format_line(1,flag,*todata)) - except TypeError: - # exceptions occur for lines where context separators go - fromlist.append(None) - tolist.append(None) - flaglist.append(flag) - return fromlist,tolist,flaglist - - def _format_line(self,side,flag,linenum,text): - """Returns HTML markup of "from" / "to" text lines - - side -- 0 or 1 indicating "from" or "to" text - flag -- indicates if difference on line - linenum -- line number (used for line number column) - text -- line text to be marked up - """ - try: - linenum = '%d' % linenum - id = ' id="%s%s"' % (self._prefix[side],linenum) - except TypeError: - # handle blank lines where linenum is '>' or '' - id = '' - # replace those things that would get confused with HTML symbols - text=text.replace("&","&").replace(">",">").replace("<","<") - - # make space non-breakable so they don't get compressed or line wrapped - text = text.replace(' ',' ').rstrip() - - return '%s%s' \ - % (id,linenum,text) - - def _make_prefix(self): - """Create unique anchor prefixes""" - - # Generate a unique anchor prefix so multiple tables - # can exist on the same HTML page without conflicts. - fromprefix = "from%d_" % HtmlDiff._default_prefix - toprefix = "to%d_" % HtmlDiff._default_prefix - HtmlDiff._default_prefix += 1 - # store prefixes so line format method has access - self._prefix = [fromprefix,toprefix] - - def _convert_flags(self,fromlist,tolist,flaglist,context,numlines): - """Makes list of "next" links""" - - # all anchor names will be generated using the unique "to" prefix - toprefix = self._prefix[1] - - # process change flags, generating middle column of next anchors/links - next_id = ['']*len(flaglist) - next_href = ['']*len(flaglist) - num_chg, in_change = 0, False - last = 0 - for i,flag in enumerate(flaglist): - if flag: - if not in_change: - in_change = True - last = i - # at the beginning of a change, drop an anchor a few lines - # (the context lines) before the change for the previous - # link - i = max([0,i-numlines]) - next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix,num_chg) - # at the beginning of a change, drop a link to the next - # change - num_chg += 1 - next_href[last] = 'n' % ( - toprefix,num_chg) - else: - in_change = False - # check for cases where there is no content to avoid exceptions - if not flaglist: - flaglist = [False] - next_id = [''] - next_href = [''] - last = 0 - if context: - fromlist = [' No Differences Found '] - tolist = fromlist - else: - fromlist = tolist = [' Empty File '] - # if not a change on first line, drop a link - if not flaglist[0]: - next_href[0] = 'f' % toprefix - # redo the last link to link to the top - next_href[last] = 't' % (toprefix) - - return fromlist,tolist,flaglist,next_href,next_id - - def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False, - numlines=5): - """Returns HTML table of side by side comparison with change highlights - - Arguments: - fromlines -- list of "from" lines - tolines -- list of "to" lines - fromdesc -- "from" file column header string - todesc -- "to" file column header string - context -- set to True for contextual differences (defaults to False - which shows full differences). - numlines -- number of context lines. When context is set True, - controls number of lines displayed before and after the change. - When context is False, controls the number of lines to place - the "next" link anchors before the next change (so click of - "next" link jumps to just before the change). - """ - - # make unique anchor prefixes so that multiple tables may exist - # on the same page without conflict. - self._make_prefix() - - # change tabs to spaces before it gets more difficult after we insert - # markup - fromlines,tolines = self._tab_newline_replace(fromlines,tolines) - - # create diffs iterator which generates side by side from/to data - if context: - context_lines = numlines - else: - context_lines = None - diffs = _mdiff(fromlines,tolines,context_lines,linejunk=self._linejunk, - charjunk=self._charjunk) - - # set up iterator to wrap lines that exceed desired width - if self._wrapcolumn: - diffs = self._line_wrapper(diffs) - - # collect up from/to lines and flags into lists (also format the lines) - fromlist,tolist,flaglist = self._collect_lines(diffs) - - # process change flags, generating middle column of next anchors/links - fromlist,tolist,flaglist,next_href,next_id = self._convert_flags( - fromlist,tolist,flaglist,context,numlines) - - s = [] - fmt = ' %s%s' + \ - '%s%s\n' - for i in range(len(flaglist)): - if flaglist[i] is None: - # mdiff yields None on separator lines skip the bogus ones - # generated for the first line - if i > 0: - s.append(' \n \n') - else: - s.append( fmt % (next_id[i],next_href[i],fromlist[i], - next_href[i],tolist[i])) - if fromdesc or todesc: - header_row = '%s%s%s%s' % ( - '
', - '%s' % fromdesc, - '
', - '%s' % todesc) - else: - header_row = '' - - table = self._table_template % dict( - data_rows=''.join(s), - header_row=header_row, - prefix=self._prefix[1]) - - return table.replace('\0+',''). \ - replace('\0-',''). \ - replace('\0^',''). \ - replace('\1',''). \ - replace('\t',' ') - -del re - -def restore(delta, which): - r""" - Generate one of the two sequences that generated a delta. - - Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract - lines originating from file 1 or 2 (parameter `which`), stripping off line - prefixes. - - Examples: - - >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1), - ... 'ore\ntree\nemu\n'.splitlines(1)) - >>> diff = list(diff) - >>> print ''.join(restore(diff, 1)), - one - two - three - >>> print ''.join(restore(diff, 2)), - ore - tree - emu - """ - try: - tag = {1: "- ", 2: "+ "}[int(which)] - except KeyError: - raise ValueError, ('unknown delta choice (must be 1 or 2): %r' - % which) - prefixes = (" ", tag) - for line in delta: - if line[:2] in prefixes: - yield line[2:] - -def _test(): - import doctest, difflib - return doctest.testmod(difflib) - -if __name__ == "__main__": - _test() diff --git a/python/Lib/dircache.py b/python/Lib/dircache.py deleted file mode 100755 index 7e4f0b508a..0000000000 --- a/python/Lib/dircache.py +++ /dev/null @@ -1,41 +0,0 @@ -"""Read and cache directory listings. - -The listdir() routine returns a sorted list of the files in a directory, -using a cache to avoid reading the directory more often than necessary. -The annotate() routine appends slashes to directories.""" -from warnings import warnpy3k -warnpy3k("the dircache module has been removed in Python 3.0", stacklevel=2) -del warnpy3k - -import os - -__all__ = ["listdir", "opendir", "annotate", "reset"] - -cache = {} - -def reset(): - """Reset the cache completely.""" - global cache - cache = {} - -def listdir(path): - """List directory contents, using cache.""" - try: - cached_mtime, list = cache[path] - del cache[path] - except KeyError: - cached_mtime, list = -1, [] - mtime = os.stat(path).st_mtime - if mtime != cached_mtime: - list = os.listdir(path) - list.sort() - cache[path] = mtime, list - return list - -opendir = listdir # XXX backward compatibility - -def annotate(head, list): - """Add '/' suffixes to directories.""" - for i in range(len(list)): - if os.path.isdir(os.path.join(head, list[i])): - list[i] = list[i] + '/' diff --git a/python/Lib/dis.py b/python/Lib/dis.py deleted file mode 100755 index 5aa09c95b6..0000000000 --- a/python/Lib/dis.py +++ /dev/null @@ -1,224 +0,0 @@ -"""Disassembler of Python byte code into mnemonics.""" - -import sys -import types - -from opcode import * -from opcode import __all__ as _opcodes_all - -__all__ = ["dis", "disassemble", "distb", "disco", - "findlinestarts", "findlabels"] + _opcodes_all -del _opcodes_all - -_have_code = (types.MethodType, types.FunctionType, types.CodeType, - types.ClassType, type) - -def dis(x=None): - """Disassemble classes, methods, functions, or code. - - With no argument, disassemble the last traceback. - - """ - if x is None: - distb() - return - if isinstance(x, types.InstanceType): - x = x.__class__ - if hasattr(x, 'im_func'): - x = x.im_func - if hasattr(x, 'func_code'): - x = x.func_code - if hasattr(x, '__dict__'): - items = x.__dict__.items() - items.sort() - for name, x1 in items: - if isinstance(x1, _have_code): - print "Disassembly of %s:" % name - try: - dis(x1) - except TypeError, msg: - print "Sorry:", msg - print - elif hasattr(x, 'co_code'): - disassemble(x) - elif isinstance(x, str): - disassemble_string(x) - else: - raise TypeError, \ - "don't know how to disassemble %s objects" % \ - type(x).__name__ - -def distb(tb=None): - """Disassemble a traceback (default: last traceback).""" - if tb is None: - try: - tb = sys.last_traceback - except AttributeError: - raise RuntimeError, "no last traceback to disassemble" - while tb.tb_next: tb = tb.tb_next - disassemble(tb.tb_frame.f_code, tb.tb_lasti) - -def disassemble(co, lasti=-1): - """Disassemble a code object.""" - code = co.co_code - labels = findlabels(code) - linestarts = dict(findlinestarts(co)) - n = len(code) - i = 0 - extended_arg = 0 - free = None - while i < n: - c = code[i] - op = ord(c) - if i in linestarts: - if i > 0: - print - print "%3d" % linestarts[i], - else: - print ' ', - - if i == lasti: print '-->', - else: print ' ', - if i in labels: print '>>', - else: print ' ', - print repr(i).rjust(4), - print opname[op].ljust(20), - i = i+1 - if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg - extended_arg = 0 - i = i+2 - if op == EXTENDED_ARG: - extended_arg = oparg*65536L - print repr(oparg).rjust(5), - if op in hasconst: - print '(' + repr(co.co_consts[oparg]) + ')', - elif op in hasname: - print '(' + co.co_names[oparg] + ')', - elif op in hasjrel: - print '(to ' + repr(i + oparg) + ')', - elif op in haslocal: - print '(' + co.co_varnames[oparg] + ')', - elif op in hascompare: - print '(' + cmp_op[oparg] + ')', - elif op in hasfree: - if free is None: - free = co.co_cellvars + co.co_freevars - print '(' + free[oparg] + ')', - print - -def disassemble_string(code, lasti=-1, varnames=None, names=None, - constants=None): - labels = findlabels(code) - n = len(code) - i = 0 - while i < n: - c = code[i] - op = ord(c) - if i == lasti: print '-->', - else: print ' ', - if i in labels: print '>>', - else: print ' ', - print repr(i).rjust(4), - print opname[op].ljust(15), - i = i+1 - if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 - i = i+2 - print repr(oparg).rjust(5), - if op in hasconst: - if constants: - print '(' + repr(constants[oparg]) + ')', - else: - print '(%d)'%oparg, - elif op in hasname: - if names is not None: - print '(' + names[oparg] + ')', - else: - print '(%d)'%oparg, - elif op in hasjrel: - print '(to ' + repr(i + oparg) + ')', - elif op in haslocal: - if varnames: - print '(' + varnames[oparg] + ')', - else: - print '(%d)' % oparg, - elif op in hascompare: - print '(' + cmp_op[oparg] + ')', - print - -disco = disassemble # XXX For backwards compatibility - -def findlabels(code): - """Detect all offsets in a byte code which are jump targets. - - Return the list of offsets. - - """ - labels = [] - n = len(code) - i = 0 - while i < n: - c = code[i] - op = ord(c) - i = i+1 - if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 - i = i+2 - label = -1 - if op in hasjrel: - label = i+oparg - elif op in hasjabs: - label = oparg - if label >= 0: - if label not in labels: - labels.append(label) - return labels - -def findlinestarts(code): - """Find the offsets in a byte code which are start of lines in the source. - - Generate pairs (offset, lineno) as described in Python/compile.c. - - """ - byte_increments = [ord(c) for c in code.co_lnotab[0::2]] - line_increments = [ord(c) for c in code.co_lnotab[1::2]] - - lastlineno = None - lineno = code.co_firstlineno - addr = 0 - for byte_incr, line_incr in zip(byte_increments, line_increments): - if byte_incr: - if lineno != lastlineno: - yield (addr, lineno) - lastlineno = lineno - addr += byte_incr - lineno += line_incr - if lineno != lastlineno: - yield (addr, lineno) - -def _test(): - """Simple test program to disassemble a file.""" - if sys.argv[1:]: - if sys.argv[2:]: - sys.stderr.write("usage: python dis.py [-|file]\n") - sys.exit(2) - fn = sys.argv[1] - if not fn or fn == "-": - fn = None - else: - fn = None - if fn is None: - f = sys.stdin - else: - f = open(fn) - source = f.read() - if fn is not None: - f.close() - else: - fn = "" - code = compile(source, fn, "exec") - dis(code) - -if __name__ == "__main__": - _test() diff --git a/python/Lib/doctest.py b/python/Lib/doctest.py deleted file mode 100755 index 4dac3b317f..0000000000 --- a/python/Lib/doctest.py +++ /dev/null @@ -1,2817 +0,0 @@ -# Module doctest. -# Released to the public domain 16-Jan-2001, by Tim Peters (tim@python.org). -# Major enhancements and refactoring by: -# Jim Fulton -# Edward Loper - -# Provided as-is; use at your own risk; no warranty; no promises; enjoy! - -r"""Module doctest -- a framework for running examples in docstrings. - -In simplest use, end each module M to be tested with: - -def _test(): - import doctest - doctest.testmod() - -if __name__ == "__main__": - _test() - -Then running the module as a script will cause the examples in the -docstrings to get executed and verified: - -python M.py - -This won't display anything unless an example fails, in which case the -failing example(s) and the cause(s) of the failure(s) are printed to stdout -(why not stderr? because stderr is a lame hack <0.2 wink>), and the final -line of output is "Test failed.". - -Run it with the -v switch instead: - -python M.py -v - -and a detailed report of all examples tried is printed to stdout, along -with assorted summaries at the end. - -You can force verbose mode by passing "verbose=True" to testmod, or prohibit -it by passing "verbose=False". In either of those cases, sys.argv is not -examined by testmod. - -There are a variety of other ways to run doctests, including integration -with the unittest framework, and support for running non-Python text -files containing doctests. There are also many ways to override parts -of doctest's default behaviors. See the Library Reference Manual for -details. -""" - -__docformat__ = 'reStructuredText en' - -__all__ = [ - # 0, Option Flags - 'register_optionflag', - 'DONT_ACCEPT_TRUE_FOR_1', - 'DONT_ACCEPT_BLANKLINE', - 'NORMALIZE_WHITESPACE', - 'ELLIPSIS', - 'SKIP', - 'IGNORE_EXCEPTION_DETAIL', - 'COMPARISON_FLAGS', - 'REPORT_UDIFF', - 'REPORT_CDIFF', - 'REPORT_NDIFF', - 'REPORT_ONLY_FIRST_FAILURE', - 'REPORTING_FLAGS', - # 1. Utility Functions - # 2. Example & DocTest - 'Example', - 'DocTest', - # 3. Doctest Parser - 'DocTestParser', - # 4. Doctest Finder - 'DocTestFinder', - # 5. Doctest Runner - 'DocTestRunner', - 'OutputChecker', - 'DocTestFailure', - 'UnexpectedException', - 'DebugRunner', - # 6. Test Functions - 'testmod', - 'testfile', - 'run_docstring_examples', - # 7. Tester - 'Tester', - # 8. Unittest Support - 'DocTestSuite', - 'DocFileSuite', - 'set_unittest_reportflags', - # 9. Debugging Support - 'script_from_examples', - 'testsource', - 'debug_src', - 'debug', -] - -import __future__ - -import sys, traceback, inspect, linecache, os, re -import unittest, difflib, pdb, tempfile -import warnings -from StringIO import StringIO -from collections import namedtuple - -TestResults = namedtuple('TestResults', 'failed attempted') - -# There are 4 basic classes: -# - Example: a pair, plus an intra-docstring line number. -# - DocTest: a collection of examples, parsed from a docstring, plus -# info about where the docstring came from (name, filename, lineno). -# - DocTestFinder: extracts DocTests from a given object's docstring and -# its contained objects' docstrings. -# - DocTestRunner: runs DocTest cases, and accumulates statistics. -# -# So the basic picture is: -# -# list of: -# +------+ +---------+ +-------+ -# |object| --DocTestFinder-> | DocTest | --DocTestRunner-> |results| -# +------+ +---------+ +-------+ -# | Example | -# | ... | -# | Example | -# +---------+ - -# Option constants. - -OPTIONFLAGS_BY_NAME = {} -def register_optionflag(name): - # Create a new flag unless `name` is already known. - return OPTIONFLAGS_BY_NAME.setdefault(name, 1 << len(OPTIONFLAGS_BY_NAME)) - -DONT_ACCEPT_TRUE_FOR_1 = register_optionflag('DONT_ACCEPT_TRUE_FOR_1') -DONT_ACCEPT_BLANKLINE = register_optionflag('DONT_ACCEPT_BLANKLINE') -NORMALIZE_WHITESPACE = register_optionflag('NORMALIZE_WHITESPACE') -ELLIPSIS = register_optionflag('ELLIPSIS') -SKIP = register_optionflag('SKIP') -IGNORE_EXCEPTION_DETAIL = register_optionflag('IGNORE_EXCEPTION_DETAIL') - -COMPARISON_FLAGS = (DONT_ACCEPT_TRUE_FOR_1 | - DONT_ACCEPT_BLANKLINE | - NORMALIZE_WHITESPACE | - ELLIPSIS | - SKIP | - IGNORE_EXCEPTION_DETAIL) - -REPORT_UDIFF = register_optionflag('REPORT_UDIFF') -REPORT_CDIFF = register_optionflag('REPORT_CDIFF') -REPORT_NDIFF = register_optionflag('REPORT_NDIFF') -REPORT_ONLY_FIRST_FAILURE = register_optionflag('REPORT_ONLY_FIRST_FAILURE') - -REPORTING_FLAGS = (REPORT_UDIFF | - REPORT_CDIFF | - REPORT_NDIFF | - REPORT_ONLY_FIRST_FAILURE) - -# Special string markers for use in `want` strings: -BLANKLINE_MARKER = '' -ELLIPSIS_MARKER = '...' - -###################################################################### -## Table of Contents -###################################################################### -# 1. Utility Functions -# 2. Example & DocTest -- store test cases -# 3. DocTest Parser -- extracts examples from strings -# 4. DocTest Finder -- extracts test cases from objects -# 5. DocTest Runner -- runs test cases -# 6. Test Functions -- convenient wrappers for testing -# 7. Tester Class -- for backwards compatibility -# 8. Unittest Support -# 9. Debugging Support -# 10. Example Usage - -###################################################################### -## 1. Utility Functions -###################################################################### - -def _extract_future_flags(globs): - """ - Return the compiler-flags associated with the future features that - have been imported into the given namespace (globs). - """ - flags = 0 - for fname in __future__.all_feature_names: - feature = globs.get(fname, None) - if feature is getattr(__future__, fname): - flags |= feature.compiler_flag - return flags - -def _normalize_module(module, depth=2): - """ - Return the module specified by `module`. In particular: - - If `module` is a module, then return module. - - If `module` is a string, then import and return the - module with that name. - - If `module` is None, then return the calling module. - The calling module is assumed to be the module of - the stack frame at the given depth in the call stack. - """ - if inspect.ismodule(module): - return module - elif isinstance(module, (str, unicode)): - return __import__(module, globals(), locals(), ["*"]) - elif module is None: - return sys.modules[sys._getframe(depth).f_globals['__name__']] - else: - raise TypeError("Expected a module, string, or None") - -def _load_testfile(filename, package, module_relative): - if module_relative: - package = _normalize_module(package, 3) - filename = _module_relative_path(package, filename) - if hasattr(package, '__loader__'): - if hasattr(package.__loader__, 'get_data'): - file_contents = package.__loader__.get_data(filename) - # get_data() opens files as 'rb', so one must do the equivalent - # conversion as universal newlines would do. - return file_contents.replace(os.linesep, '\n'), filename - with open(filename, 'U') as f: - return f.read(), filename - -# Use sys.stdout encoding for output. -_encoding = getattr(sys.__stdout__, 'encoding', None) or 'utf-8' - -def _indent(s, indent=4): - """ - Add the given number of space characters to the beginning of - every non-blank line in `s`, and return the result. - If the string `s` is Unicode, it is encoded using the stdout - encoding and the `backslashreplace` error handler. - """ - if isinstance(s, unicode): - s = s.encode(_encoding, 'backslashreplace') - # This regexp matches the start of non-blank lines: - return re.sub('(?m)^(?!$)', indent*' ', s) - -def _exception_traceback(exc_info): - """ - Return a string containing a traceback message for the given - exc_info tuple (as returned by sys.exc_info()). - """ - # Get a traceback message. - excout = StringIO() - exc_type, exc_val, exc_tb = exc_info - traceback.print_exception(exc_type, exc_val, exc_tb, file=excout) - return excout.getvalue() - -# Override some StringIO methods. -class _SpoofOut(StringIO): - def getvalue(self): - result = StringIO.getvalue(self) - # If anything at all was written, make sure there's a trailing - # newline. There's no way for the expected output to indicate - # that a trailing newline is missing. - if result and not result.endswith("\n"): - result += "\n" - # Prevent softspace from screwing up the next test case, in - # case they used print with a trailing comma in an example. - if hasattr(self, "softspace"): - del self.softspace - return result - - def truncate(self, size=None): - StringIO.truncate(self, size) - if hasattr(self, "softspace"): - del self.softspace - if not self.buf: - # Reset it to an empty string, to make sure it's not unicode. - self.buf = '' - -# Worst-case linear-time ellipsis matching. -def _ellipsis_match(want, got): - """ - Essentially the only subtle case: - >>> _ellipsis_match('aa...aa', 'aaa') - False - """ - if ELLIPSIS_MARKER not in want: - return want == got - - # Find "the real" strings. - ws = want.split(ELLIPSIS_MARKER) - assert len(ws) >= 2 - - # Deal with exact matches possibly needed at one or both ends. - startpos, endpos = 0, len(got) - w = ws[0] - if w: # starts with exact match - if got.startswith(w): - startpos = len(w) - del ws[0] - else: - return False - w = ws[-1] - if w: # ends with exact match - if got.endswith(w): - endpos -= len(w) - del ws[-1] - else: - return False - - if startpos > endpos: - # Exact end matches required more characters than we have, as in - # _ellipsis_match('aa...aa', 'aaa') - return False - - # For the rest, we only need to find the leftmost non-overlapping - # match for each piece. If there's no overall match that way alone, - # there's no overall match period. - for w in ws: - # w may be '' at times, if there are consecutive ellipses, or - # due to an ellipsis at the start or end of `want`. That's OK. - # Search for an empty string succeeds, and doesn't change startpos. - startpos = got.find(w, startpos, endpos) - if startpos < 0: - return False - startpos += len(w) - - return True - -def _comment_line(line): - "Return a commented form of the given line" - line = line.rstrip() - if line: - return '# '+line - else: - return '#' - -def _strip_exception_details(msg): - # Support for IGNORE_EXCEPTION_DETAIL. - # Get rid of everything except the exception name; in particular, drop - # the possibly dotted module path (if any) and the exception message (if - # any). We assume that a colon is never part of a dotted name, or of an - # exception name. - # E.g., given - # "foo.bar.MyError: la di da" - # return "MyError" - # Or for "abc.def" or "abc.def:\n" return "def". - - start, end = 0, len(msg) - # The exception name must appear on the first line. - i = msg.find("\n") - if i >= 0: - end = i - # retain up to the first colon (if any) - i = msg.find(':', 0, end) - if i >= 0: - end = i - # retain just the exception name - i = msg.rfind('.', 0, end) - if i >= 0: - start = i+1 - return msg[start: end] - -class _OutputRedirectingPdb(pdb.Pdb): - """ - A specialized version of the python debugger that redirects stdout - to a given stream when interacting with the user. Stdout is *not* - redirected when traced code is executed. - """ - def __init__(self, out): - self.__out = out - self.__debugger_used = False - pdb.Pdb.__init__(self, stdout=out) - # still use input() to get user input - self.use_rawinput = 1 - - def set_trace(self, frame=None): - self.__debugger_used = True - if frame is None: - frame = sys._getframe().f_back - pdb.Pdb.set_trace(self, frame) - - def set_continue(self): - # Calling set_continue unconditionally would break unit test - # coverage reporting, as Bdb.set_continue calls sys.settrace(None). - if self.__debugger_used: - pdb.Pdb.set_continue(self) - - def trace_dispatch(self, *args): - # Redirect stdout to the given stream. - save_stdout = sys.stdout - sys.stdout = self.__out - # Call Pdb's trace dispatch method. - try: - return pdb.Pdb.trace_dispatch(self, *args) - finally: - sys.stdout = save_stdout - -# [XX] Normalize with respect to os.path.pardir? -def _module_relative_path(module, path): - if not inspect.ismodule(module): - raise TypeError, 'Expected a module: %r' % module - if path.startswith('/'): - raise ValueError, 'Module-relative files may not have absolute paths' - - # Find the base directory for the path. - if hasattr(module, '__file__'): - # A normal module/package - basedir = os.path.split(module.__file__)[0] - elif module.__name__ == '__main__': - # An interactive session. - if len(sys.argv)>0 and sys.argv[0] != '': - basedir = os.path.split(sys.argv[0])[0] - else: - basedir = os.curdir - else: - # A module w/o __file__ (this includes builtins) - raise ValueError("Can't resolve paths relative to the module " + - module + " (it has no __file__)") - - # Combine the base directory and the path. - return os.path.join(basedir, *(path.split('/'))) - -###################################################################### -## 2. Example & DocTest -###################################################################### -## - An "example" is a pair, where "source" is a -## fragment of source code, and "want" is the expected output for -## "source." The Example class also includes information about -## where the example was extracted from. -## -## - A "doctest" is a collection of examples, typically extracted from -## a string (such as an object's docstring). The DocTest class also -## includes information about where the string was extracted from. - -class Example: - """ - A single doctest example, consisting of source code and expected - output. `Example` defines the following attributes: - - - source: A single Python statement, always ending with a newline. - The constructor adds a newline if needed. - - - want: The expected output from running the source code (either - from stdout, or a traceback in case of exception). `want` ends - with a newline unless it's empty, in which case it's an empty - string. The constructor adds a newline if needed. - - - exc_msg: The exception message generated by the example, if - the example is expected to generate an exception; or `None` if - it is not expected to generate an exception. This exception - message is compared against the return value of - `traceback.format_exception_only()`. `exc_msg` ends with a - newline unless it's `None`. The constructor adds a newline - if needed. - - - lineno: The line number within the DocTest string containing - this Example where the Example begins. This line number is - zero-based, with respect to the beginning of the DocTest. - - - indent: The example's indentation in the DocTest string. - I.e., the number of space characters that precede the - example's first prompt. - - - options: A dictionary mapping from option flags to True or - False, which is used to override default options for this - example. Any option flags not contained in this dictionary - are left at their default value (as specified by the - DocTestRunner's optionflags). By default, no options are set. - """ - def __init__(self, source, want, exc_msg=None, lineno=0, indent=0, - options=None): - # Normalize inputs. - if not source.endswith('\n'): - source += '\n' - if want and not want.endswith('\n'): - want += '\n' - if exc_msg is not None and not exc_msg.endswith('\n'): - exc_msg += '\n' - # Store properties. - self.source = source - self.want = want - self.lineno = lineno - self.indent = indent - if options is None: options = {} - self.options = options - self.exc_msg = exc_msg - - def __eq__(self, other): - if type(self) is not type(other): - return NotImplemented - - return self.source == other.source and \ - self.want == other.want and \ - self.lineno == other.lineno and \ - self.indent == other.indent and \ - self.options == other.options and \ - self.exc_msg == other.exc_msg - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((self.source, self.want, self.lineno, self.indent, - self.exc_msg)) - - -class DocTest: - """ - A collection of doctest examples that should be run in a single - namespace. Each `DocTest` defines the following attributes: - - - examples: the list of examples. - - - globs: The namespace (aka globals) that the examples should - be run in. - - - name: A name identifying the DocTest (typically, the name of - the object whose docstring this DocTest was extracted from). - - - filename: The name of the file that this DocTest was extracted - from, or `None` if the filename is unknown. - - - lineno: The line number within filename where this DocTest - begins, or `None` if the line number is unavailable. This - line number is zero-based, with respect to the beginning of - the file. - - - docstring: The string that the examples were extracted from, - or `None` if the string is unavailable. - """ - def __init__(self, examples, globs, name, filename, lineno, docstring): - """ - Create a new DocTest containing the given examples. The - DocTest's globals are initialized with a copy of `globs`. - """ - assert not isinstance(examples, basestring), \ - "DocTest no longer accepts str; use DocTestParser instead" - self.examples = examples - self.docstring = docstring - self.globs = globs.copy() - self.name = name - self.filename = filename - self.lineno = lineno - - def __repr__(self): - if len(self.examples) == 0: - examples = 'no examples' - elif len(self.examples) == 1: - examples = '1 example' - else: - examples = '%d examples' % len(self.examples) - return ('' % - (self.name, self.filename, self.lineno, examples)) - - def __eq__(self, other): - if type(self) is not type(other): - return NotImplemented - - return self.examples == other.examples and \ - self.docstring == other.docstring and \ - self.globs == other.globs and \ - self.name == other.name and \ - self.filename == other.filename and \ - self.lineno == other.lineno - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((self.docstring, self.name, self.filename, self.lineno)) - - # This lets us sort tests by name: - def __cmp__(self, other): - if not isinstance(other, DocTest): - return -1 - return cmp((self.name, self.filename, self.lineno, id(self)), - (other.name, other.filename, other.lineno, id(other))) - -###################################################################### -## 3. DocTestParser -###################################################################### - -class DocTestParser: - """ - A class used to parse strings containing doctest examples. - """ - # This regular expression is used to find doctest examples in a - # string. It defines three groups: `source` is the source code - # (including leading indentation and prompts); `indent` is the - # indentation of the first (PS1) line of the source code; and - # `want` is the expected output (including leading indentation). - _EXAMPLE_RE = re.compile(r''' - # Source consists of a PS1 line followed by zero or more PS2 lines. - (?P - (?:^(?P [ ]*) >>> .*) # PS1 line - (?:\n [ ]* \.\.\. .*)*) # PS2 lines - \n? - # Want consists of any non-blank lines that do not start with PS1. - (?P (?:(?![ ]*$) # Not a blank line - (?![ ]*>>>) # Not a line starting with PS1 - .+$\n? # But any other line - )*) - ''', re.MULTILINE | re.VERBOSE) - - # A regular expression for handling `want` strings that contain - # expected exceptions. It divides `want` into three pieces: - # - the traceback header line (`hdr`) - # - the traceback stack (`stack`) - # - the exception message (`msg`), as generated by - # traceback.format_exception_only() - # `msg` may have multiple lines. We assume/require that the - # exception message is the first non-indented line starting with a word - # character following the traceback header line. - _EXCEPTION_RE = re.compile(r""" - # Grab the traceback header. Different versions of Python have - # said different things on the first traceback line. - ^(?P Traceback\ \( - (?: most\ recent\ call\ last - | innermost\ last - ) \) : - ) - \s* $ # toss trailing whitespace on the header. - (?P .*?) # don't blink: absorb stuff until... - ^ (?P \w+ .*) # a line *starts* with alphanum. - """, re.VERBOSE | re.MULTILINE | re.DOTALL) - - # A callable returning a true value iff its argument is a blank line - # or contains a single comment. - _IS_BLANK_OR_COMMENT = re.compile(r'^[ ]*(#.*)?$').match - - def parse(self, string, name=''): - """ - Divide the given string into examples and intervening text, - and return them as a list of alternating Examples and strings. - Line numbers for the Examples are 0-based. The optional - argument `name` is a name identifying this string, and is only - used for error messages. - """ - string = string.expandtabs() - # If all lines begin with the same indentation, then strip it. - min_indent = self._min_indent(string) - if min_indent > 0: - string = '\n'.join([l[min_indent:] for l in string.split('\n')]) - - output = [] - charno, lineno = 0, 0 - # Find all doctest examples in the string: - for m in self._EXAMPLE_RE.finditer(string): - # Add the pre-example text to `output`. - output.append(string[charno:m.start()]) - # Update lineno (lines before this example) - lineno += string.count('\n', charno, m.start()) - # Extract info from the regexp match. - (source, options, want, exc_msg) = \ - self._parse_example(m, name, lineno) - # Create an Example, and add it to the list. - if not self._IS_BLANK_OR_COMMENT(source): - output.append( Example(source, want, exc_msg, - lineno=lineno, - indent=min_indent+len(m.group('indent')), - options=options) ) - # Update lineno (lines inside this example) - lineno += string.count('\n', m.start(), m.end()) - # Update charno. - charno = m.end() - # Add any remaining post-example text to `output`. - output.append(string[charno:]) - return output - - def get_doctest(self, string, globs, name, filename, lineno): - """ - Extract all doctest examples from the given string, and - collect them into a `DocTest` object. - - `globs`, `name`, `filename`, and `lineno` are attributes for - the new `DocTest` object. See the documentation for `DocTest` - for more information. - """ - return DocTest(self.get_examples(string, name), globs, - name, filename, lineno, string) - - def get_examples(self, string, name=''): - """ - Extract all doctest examples from the given string, and return - them as a list of `Example` objects. Line numbers are - 0-based, because it's most common in doctests that nothing - interesting appears on the same line as opening triple-quote, - and so the first interesting line is called \"line 1\" then. - - The optional argument `name` is a name identifying this - string, and is only used for error messages. - """ - return [x for x in self.parse(string, name) - if isinstance(x, Example)] - - def _parse_example(self, m, name, lineno): - """ - Given a regular expression match from `_EXAMPLE_RE` (`m`), - return a pair `(source, want)`, where `source` is the matched - example's source code (with prompts and indentation stripped); - and `want` is the example's expected output (with indentation - stripped). - - `name` is the string's name, and `lineno` is the line number - where the example starts; both are used for error messages. - """ - # Get the example's indentation level. - indent = len(m.group('indent')) - - # Divide source into lines; check that they're properly - # indented; and then strip their indentation & prompts. - source_lines = m.group('source').split('\n') - self._check_prompt_blank(source_lines, indent, name, lineno) - self._check_prefix(source_lines[1:], ' '*indent + '.', name, lineno) - source = '\n'.join([sl[indent+4:] for sl in source_lines]) - - # Divide want into lines; check that it's properly indented; and - # then strip the indentation. Spaces before the last newline should - # be preserved, so plain rstrip() isn't good enough. - want = m.group('want') - want_lines = want.split('\n') - if len(want_lines) > 1 and re.match(r' *$', want_lines[-1]): - del want_lines[-1] # forget final newline & spaces after it - self._check_prefix(want_lines, ' '*indent, name, - lineno + len(source_lines)) - want = '\n'.join([wl[indent:] for wl in want_lines]) - - # If `want` contains a traceback message, then extract it. - m = self._EXCEPTION_RE.match(want) - if m: - exc_msg = m.group('msg') - else: - exc_msg = None - - # Extract options from the source. - options = self._find_options(source, name, lineno) - - return source, options, want, exc_msg - - # This regular expression looks for option directives in the - # source code of an example. Option directives are comments - # starting with "doctest:". Warning: this may give false - # positives for string-literals that contain the string - # "#doctest:". Eliminating these false positives would require - # actually parsing the string; but we limit them by ignoring any - # line containing "#doctest:" that is *followed* by a quote mark. - _OPTION_DIRECTIVE_RE = re.compile(r'#\s*doctest:\s*([^\n\'"]*)$', - re.MULTILINE) - - def _find_options(self, source, name, lineno): - """ - Return a dictionary containing option overrides extracted from - option directives in the given source string. - - `name` is the string's name, and `lineno` is the line number - where the example starts; both are used for error messages. - """ - options = {} - # (note: with the current regexp, this will match at most once:) - for m in self._OPTION_DIRECTIVE_RE.finditer(source): - option_strings = m.group(1).replace(',', ' ').split() - for option in option_strings: - if (option[0] not in '+-' or - option[1:] not in OPTIONFLAGS_BY_NAME): - raise ValueError('line %r of the doctest for %s ' - 'has an invalid option: %r' % - (lineno+1, name, option)) - flag = OPTIONFLAGS_BY_NAME[option[1:]] - options[flag] = (option[0] == '+') - if options and self._IS_BLANK_OR_COMMENT(source): - raise ValueError('line %r of the doctest for %s has an option ' - 'directive on a line with no example: %r' % - (lineno, name, source)) - return options - - # This regular expression finds the indentation of every non-blank - # line in a string. - _INDENT_RE = re.compile('^([ ]*)(?=\S)', re.MULTILINE) - - def _min_indent(self, s): - "Return the minimum indentation of any non-blank line in `s`" - indents = [len(indent) for indent in self._INDENT_RE.findall(s)] - if len(indents) > 0: - return min(indents) - else: - return 0 - - def _check_prompt_blank(self, lines, indent, name, lineno): - """ - Given the lines of a source string (including prompts and - leading indentation), check to make sure that every prompt is - followed by a space character. If any line is not followed by - a space character, then raise ValueError. - """ - for i, line in enumerate(lines): - if len(line) >= indent+4 and line[indent+3] != ' ': - raise ValueError('line %r of the docstring for %s ' - 'lacks blank after %s: %r' % - (lineno+i+1, name, - line[indent:indent+3], line)) - - def _check_prefix(self, lines, prefix, name, lineno): - """ - Check that every line in the given list starts with the given - prefix; if any line does not, then raise a ValueError. - """ - for i, line in enumerate(lines): - if line and not line.startswith(prefix): - raise ValueError('line %r of the docstring for %s has ' - 'inconsistent leading whitespace: %r' % - (lineno+i+1, name, line)) - - -###################################################################### -## 4. DocTest Finder -###################################################################### - -class DocTestFinder: - """ - A class used to extract the DocTests that are relevant to a given - object, from its docstring and the docstrings of its contained - objects. Doctests can currently be extracted from the following - object types: modules, functions, classes, methods, staticmethods, - classmethods, and properties. - """ - - def __init__(self, verbose=False, parser=DocTestParser(), - recurse=True, exclude_empty=True): - """ - Create a new doctest finder. - - The optional argument `parser` specifies a class or - function that should be used to create new DocTest objects (or - objects that implement the same interface as DocTest). The - signature for this factory function should match the signature - of the DocTest constructor. - - If the optional argument `recurse` is false, then `find` will - only examine the given object, and not any contained objects. - - If the optional argument `exclude_empty` is false, then `find` - will include tests for objects with empty docstrings. - """ - self._parser = parser - self._verbose = verbose - self._recurse = recurse - self._exclude_empty = exclude_empty - - def find(self, obj, name=None, module=None, globs=None, extraglobs=None): - """ - Return a list of the DocTests that are defined by the given - object's docstring, or by any of its contained objects' - docstrings. - - The optional parameter `module` is the module that contains - the given object. If the module is not specified or is None, then - the test finder will attempt to automatically determine the - correct module. The object's module is used: - - - As a default namespace, if `globs` is not specified. - - To prevent the DocTestFinder from extracting DocTests - from objects that are imported from other modules. - - To find the name of the file containing the object. - - To help find the line number of the object within its - file. - - Contained objects whose module does not match `module` are ignored. - - If `module` is False, no attempt to find the module will be made. - This is obscure, of use mostly in tests: if `module` is False, or - is None but cannot be found automatically, then all objects are - considered to belong to the (non-existent) module, so all contained - objects will (recursively) be searched for doctests. - - The globals for each DocTest is formed by combining `globs` - and `extraglobs` (bindings in `extraglobs` override bindings - in `globs`). A new copy of the globals dictionary is created - for each DocTest. If `globs` is not specified, then it - defaults to the module's `__dict__`, if specified, or {} - otherwise. If `extraglobs` is not specified, then it defaults - to {}. - - """ - # If name was not specified, then extract it from the object. - if name is None: - name = getattr(obj, '__name__', None) - if name is None: - raise ValueError("DocTestFinder.find: name must be given " - "when obj.__name__ doesn't exist: %r" % - (type(obj),)) - - # Find the module that contains the given object (if obj is - # a module, then module=obj.). Note: this may fail, in which - # case module will be None. - if module is False: - module = None - elif module is None: - module = inspect.getmodule(obj) - - # Read the module's source code. This is used by - # DocTestFinder._find_lineno to find the line number for a - # given object's docstring. - try: - file = inspect.getsourcefile(obj) or inspect.getfile(obj) - if module is not None: - # Supply the module globals in case the module was - # originally loaded via a PEP 302 loader and - # file is not a valid filesystem path - source_lines = linecache.getlines(file, module.__dict__) - else: - # No access to a loader, so assume it's a normal - # filesystem path - source_lines = linecache.getlines(file) - if not source_lines: - source_lines = None - except TypeError: - source_lines = None - - # Initialize globals, and merge in extraglobs. - if globs is None: - if module is None: - globs = {} - else: - globs = module.__dict__.copy() - else: - globs = globs.copy() - if extraglobs is not None: - globs.update(extraglobs) - if '__name__' not in globs: - globs['__name__'] = '__main__' # provide a default module name - - # Recursively explore `obj`, extracting DocTests. - tests = [] - self._find(tests, obj, name, module, source_lines, globs, {}) - # Sort the tests by alpha order of names, for consistency in - # verbose-mode output. This was a feature of doctest in Pythons - # <= 2.3 that got lost by accident in 2.4. It was repaired in - # 2.4.4 and 2.5. - tests.sort() - return tests - - def _from_module(self, module, object): - """ - Return true if the given object is defined in the given - module. - """ - if module is None: - return True - elif inspect.getmodule(object) is not None: - return module is inspect.getmodule(object) - elif inspect.isfunction(object): - return module.__dict__ is object.func_globals - elif inspect.isclass(object): - return module.__name__ == object.__module__ - elif hasattr(object, '__module__'): - return module.__name__ == object.__module__ - elif isinstance(object, property): - return True # [XX] no way not be sure. - else: - raise ValueError("object must be a class or function") - - def _find(self, tests, obj, name, module, source_lines, globs, seen): - """ - Find tests for the given object and any contained objects, and - add them to `tests`. - """ - if self._verbose: - print 'Finding tests in %s' % name - - # If we've already processed this object, then ignore it. - if id(obj) in seen: - return - seen[id(obj)] = 1 - - # Find a test for this object, and add it to the list of tests. - test = self._get_test(obj, name, module, globs, source_lines) - if test is not None: - tests.append(test) - - # Look for tests in a module's contained objects. - if inspect.ismodule(obj) and self._recurse: - for valname, val in obj.__dict__.items(): - valname = '%s.%s' % (name, valname) - # Recurse to functions & classes. - if ((inspect.isfunction(val) or inspect.isclass(val)) and - self._from_module(module, val)): - self._find(tests, val, valname, module, source_lines, - globs, seen) - - # Look for tests in a module's __test__ dictionary. - if inspect.ismodule(obj) and self._recurse: - for valname, val in getattr(obj, '__test__', {}).items(): - if not isinstance(valname, basestring): - raise ValueError("DocTestFinder.find: __test__ keys " - "must be strings: %r" % - (type(valname),)) - if not (inspect.isfunction(val) or inspect.isclass(val) or - inspect.ismethod(val) or inspect.ismodule(val) or - isinstance(val, basestring)): - raise ValueError("DocTestFinder.find: __test__ values " - "must be strings, functions, methods, " - "classes, or modules: %r" % - (type(val),)) - valname = '%s.__test__.%s' % (name, valname) - self._find(tests, val, valname, module, source_lines, - globs, seen) - - # Look for tests in a class's contained objects. - if inspect.isclass(obj) and self._recurse: - for valname, val in obj.__dict__.items(): - # Special handling for staticmethod/classmethod. - if isinstance(val, staticmethod): - val = getattr(obj, valname) - if isinstance(val, classmethod): - val = getattr(obj, valname).im_func - - # Recurse to methods, properties, and nested classes. - if ((inspect.isfunction(val) or inspect.isclass(val) or - isinstance(val, property)) and - self._from_module(module, val)): - valname = '%s.%s' % (name, valname) - self._find(tests, val, valname, module, source_lines, - globs, seen) - - def _get_test(self, obj, name, module, globs, source_lines): - """ - Return a DocTest for the given object, if it defines a docstring; - otherwise, return None. - """ - # Extract the object's docstring. If it doesn't have one, - # then return None (no test for this object). - if isinstance(obj, basestring): - docstring = obj - else: - try: - if obj.__doc__ is None: - docstring = '' - else: - docstring = obj.__doc__ - if not isinstance(docstring, basestring): - docstring = str(docstring) - except (TypeError, AttributeError): - docstring = '' - - # Find the docstring's location in the file. - lineno = self._find_lineno(obj, source_lines) - - # Don't bother if the docstring is empty. - if self._exclude_empty and not docstring: - return None - - # Return a DocTest for this object. - if module is None: - filename = None - else: - filename = getattr(module, '__file__', module.__name__) - if filename[-4:] in (".pyc", ".pyo"): - filename = filename[:-1] - return self._parser.get_doctest(docstring, globs, name, - filename, lineno) - - def _find_lineno(self, obj, source_lines): - """ - Return a line number of the given object's docstring. Note: - this method assumes that the object has a docstring. - """ - lineno = None - - # Find the line number for modules. - if inspect.ismodule(obj): - lineno = 0 - - # Find the line number for classes. - # Note: this could be fooled if a class is defined multiple - # times in a single file. - if inspect.isclass(obj): - if source_lines is None: - return None - pat = re.compile(r'^\s*class\s*%s\b' % - getattr(obj, '__name__', '-')) - for i, line in enumerate(source_lines): - if pat.match(line): - lineno = i - break - - # Find the line number for functions & methods. - if inspect.ismethod(obj): obj = obj.im_func - if inspect.isfunction(obj): obj = obj.func_code - if inspect.istraceback(obj): obj = obj.tb_frame - if inspect.isframe(obj): obj = obj.f_code - if inspect.iscode(obj): - lineno = getattr(obj, 'co_firstlineno', None)-1 - - # Find the line number where the docstring starts. Assume - # that it's the first line that begins with a quote mark. - # Note: this could be fooled by a multiline function - # signature, where a continuation line begins with a quote - # mark. - if lineno is not None: - if source_lines is None: - return lineno+1 - pat = re.compile('(^|.*:)\s*\w*("|\')') - for lineno in range(lineno, len(source_lines)): - if pat.match(source_lines[lineno]): - return lineno - - # We couldn't find the line number. - return None - -###################################################################### -## 5. DocTest Runner -###################################################################### - -class DocTestRunner: - """ - A class used to run DocTest test cases, and accumulate statistics. - The `run` method is used to process a single DocTest case. It - returns a tuple `(f, t)`, where `t` is the number of test cases - tried, and `f` is the number of test cases that failed. - - >>> tests = DocTestFinder().find(_TestClass) - >>> runner = DocTestRunner(verbose=False) - >>> tests.sort(key = lambda test: test.name) - >>> for test in tests: - ... print test.name, '->', runner.run(test) - _TestClass -> TestResults(failed=0, attempted=2) - _TestClass.__init__ -> TestResults(failed=0, attempted=2) - _TestClass.get -> TestResults(failed=0, attempted=2) - _TestClass.square -> TestResults(failed=0, attempted=1) - - The `summarize` method prints a summary of all the test cases that - have been run by the runner, and returns an aggregated `(f, t)` - tuple: - - >>> runner.summarize(verbose=1) - 4 items passed all tests: - 2 tests in _TestClass - 2 tests in _TestClass.__init__ - 2 tests in _TestClass.get - 1 tests in _TestClass.square - 7 tests in 4 items. - 7 passed and 0 failed. - Test passed. - TestResults(failed=0, attempted=7) - - The aggregated number of tried examples and failed examples is - also available via the `tries` and `failures` attributes: - - >>> runner.tries - 7 - >>> runner.failures - 0 - - The comparison between expected outputs and actual outputs is done - by an `OutputChecker`. This comparison may be customized with a - number of option flags; see the documentation for `testmod` for - more information. If the option flags are insufficient, then the - comparison may also be customized by passing a subclass of - `OutputChecker` to the constructor. - - The test runner's display output can be controlled in two ways. - First, an output function (`out) can be passed to - `TestRunner.run`; this function will be called with strings that - should be displayed. It defaults to `sys.stdout.write`. If - capturing the output is not sufficient, then the display output - can be also customized by subclassing DocTestRunner, and - overriding the methods `report_start`, `report_success`, - `report_unexpected_exception`, and `report_failure`. - """ - # This divider string is used to separate failure messages, and to - # separate sections of the summary. - DIVIDER = "*" * 70 - - def __init__(self, checker=None, verbose=None, optionflags=0): - """ - Create a new test runner. - - Optional keyword arg `checker` is the `OutputChecker` that - should be used to compare the expected outputs and actual - outputs of doctest examples. - - Optional keyword arg 'verbose' prints lots of stuff if true, - only failures if false; by default, it's true iff '-v' is in - sys.argv. - - Optional argument `optionflags` can be used to control how the - test runner compares expected output to actual output, and how - it displays failures. See the documentation for `testmod` for - more information. - """ - self._checker = checker or OutputChecker() - if verbose is None: - verbose = '-v' in sys.argv - self._verbose = verbose - self.optionflags = optionflags - self.original_optionflags = optionflags - - # Keep track of the examples we've run. - self.tries = 0 - self.failures = 0 - self._name2ft = {} - - # Create a fake output target for capturing doctest output. - self._fakeout = _SpoofOut() - - #///////////////////////////////////////////////////////////////// - # Reporting methods - #///////////////////////////////////////////////////////////////// - - def report_start(self, out, test, example): - """ - Report that the test runner is about to process the given - example. (Only displays a message if verbose=True) - """ - if self._verbose: - if example.want: - out('Trying:\n' + _indent(example.source) + - 'Expecting:\n' + _indent(example.want)) - else: - out('Trying:\n' + _indent(example.source) + - 'Expecting nothing\n') - - def report_success(self, out, test, example, got): - """ - Report that the given example ran successfully. (Only - displays a message if verbose=True) - """ - if self._verbose: - out("ok\n") - - def report_failure(self, out, test, example, got): - """ - Report that the given example failed. - """ - out(self._failure_header(test, example) + - self._checker.output_difference(example, got, self.optionflags)) - - def report_unexpected_exception(self, out, test, example, exc_info): - """ - Report that the given example raised an unexpected exception. - """ - out(self._failure_header(test, example) + - 'Exception raised:\n' + _indent(_exception_traceback(exc_info))) - - def _failure_header(self, test, example): - out = [self.DIVIDER] - if test.filename: - if test.lineno is not None and example.lineno is not None: - lineno = test.lineno + example.lineno + 1 - else: - lineno = '?' - out.append('File "%s", line %s, in %s' % - (test.filename, lineno, test.name)) - else: - out.append('Line %s, in %s' % (example.lineno+1, test.name)) - out.append('Failed example:') - source = example.source - out.append(_indent(source)) - return '\n'.join(out) - - #///////////////////////////////////////////////////////////////// - # DocTest Running - #///////////////////////////////////////////////////////////////// - - def __run(self, test, compileflags, out): - """ - Run the examples in `test`. Write the outcome of each example - with one of the `DocTestRunner.report_*` methods, using the - writer function `out`. `compileflags` is the set of compiler - flags that should be used to execute examples. Return a tuple - `(f, t)`, where `t` is the number of examples tried, and `f` - is the number of examples that failed. The examples are run - in the namespace `test.globs`. - """ - # Keep track of the number of failures and tries. - failures = tries = 0 - - # Save the option flags (since option directives can be used - # to modify them). - original_optionflags = self.optionflags - - SUCCESS, FAILURE, BOOM = range(3) # `outcome` state - - check = self._checker.check_output - - # Process each example. - for examplenum, example in enumerate(test.examples): - - # If REPORT_ONLY_FIRST_FAILURE is set, then suppress - # reporting after the first failure. - quiet = (self.optionflags & REPORT_ONLY_FIRST_FAILURE and - failures > 0) - - # Merge in the example's options. - self.optionflags = original_optionflags - if example.options: - for (optionflag, val) in example.options.items(): - if val: - self.optionflags |= optionflag - else: - self.optionflags &= ~optionflag - - # If 'SKIP' is set, then skip this example. - if self.optionflags & SKIP: - continue - - # Record that we started this example. - tries += 1 - if not quiet: - self.report_start(out, test, example) - - # Use a special filename for compile(), so we can retrieve - # the source code during interactive debugging (see - # __patched_linecache_getlines). - filename = '' % (test.name, examplenum) - - # Run the example in the given context (globs), and record - # any exception that gets raised. (But don't intercept - # keyboard interrupts.) - try: - # Don't blink! This is where the user's code gets run. - exec compile(example.source, filename, "single", - compileflags, 1) in test.globs - self.debugger.set_continue() # ==== Example Finished ==== - exception = None - except KeyboardInterrupt: - raise - except: - exception = sys.exc_info() - self.debugger.set_continue() # ==== Example Finished ==== - - got = self._fakeout.getvalue() # the actual output - self._fakeout.truncate(0) - outcome = FAILURE # guilty until proved innocent or insane - - # If the example executed without raising any exceptions, - # verify its output. - if exception is None: - if check(example.want, got, self.optionflags): - outcome = SUCCESS - - # The example raised an exception: check if it was expected. - else: - exc_info = sys.exc_info() - exc_msg = traceback.format_exception_only(*exc_info[:2])[-1] - if not quiet: - got += _exception_traceback(exc_info) - - # If `example.exc_msg` is None, then we weren't expecting - # an exception. - if example.exc_msg is None: - outcome = BOOM - - # We expected an exception: see whether it matches. - elif check(example.exc_msg, exc_msg, self.optionflags): - outcome = SUCCESS - - # Another chance if they didn't care about the detail. - elif self.optionflags & IGNORE_EXCEPTION_DETAIL: - if check(_strip_exception_details(example.exc_msg), - _strip_exception_details(exc_msg), - self.optionflags): - outcome = SUCCESS - - # Report the outcome. - if outcome is SUCCESS: - if not quiet: - self.report_success(out, test, example, got) - elif outcome is FAILURE: - if not quiet: - self.report_failure(out, test, example, got) - failures += 1 - elif outcome is BOOM: - if not quiet: - self.report_unexpected_exception(out, test, example, - exc_info) - failures += 1 - else: - assert False, ("unknown outcome", outcome) - - # Restore the option flags (in case they were modified) - self.optionflags = original_optionflags - - # Record and return the number of failures and tries. - self.__record_outcome(test, failures, tries) - return TestResults(failures, tries) - - def __record_outcome(self, test, f, t): - """ - Record the fact that the given DocTest (`test`) generated `f` - failures out of `t` tried examples. - """ - f2, t2 = self._name2ft.get(test.name, (0,0)) - self._name2ft[test.name] = (f+f2, t+t2) - self.failures += f - self.tries += t - - __LINECACHE_FILENAME_RE = re.compile(r'.+)' - r'\[(?P\d+)\]>$') - def __patched_linecache_getlines(self, filename, module_globals=None): - m = self.__LINECACHE_FILENAME_RE.match(filename) - if m and m.group('name') == self.test.name: - example = self.test.examples[int(m.group('examplenum'))] - source = example.source - if isinstance(source, unicode): - source = source.encode('ascii', 'backslashreplace') - return source.splitlines(True) - else: - return self.save_linecache_getlines(filename, module_globals) - - def run(self, test, compileflags=None, out=None, clear_globs=True): - """ - Run the examples in `test`, and display the results using the - writer function `out`. - - The examples are run in the namespace `test.globs`. If - `clear_globs` is true (the default), then this namespace will - be cleared after the test runs, to help with garbage - collection. If you would like to examine the namespace after - the test completes, then use `clear_globs=False`. - - `compileflags` gives the set of flags that should be used by - the Python compiler when running the examples. If not - specified, then it will default to the set of future-import - flags that apply to `globs`. - - The output of each example is checked using - `DocTestRunner.check_output`, and the results are formatted by - the `DocTestRunner.report_*` methods. - """ - self.test = test - - if compileflags is None: - compileflags = _extract_future_flags(test.globs) - - save_stdout = sys.stdout - if out is None: - out = save_stdout.write - sys.stdout = self._fakeout - - # Patch pdb.set_trace to restore sys.stdout during interactive - # debugging (so it's not still redirected to self._fakeout). - # Note that the interactive output will go to *our* - # save_stdout, even if that's not the real sys.stdout; this - # allows us to write test cases for the set_trace behavior. - save_set_trace = pdb.set_trace - self.debugger = _OutputRedirectingPdb(save_stdout) - self.debugger.reset() - pdb.set_trace = self.debugger.set_trace - - # Patch linecache.getlines, so we can see the example's source - # when we're inside the debugger. - self.save_linecache_getlines = linecache.getlines - linecache.getlines = self.__patched_linecache_getlines - - # Make sure sys.displayhook just prints the value to stdout - save_displayhook = sys.displayhook - sys.displayhook = sys.__displayhook__ - - try: - return self.__run(test, compileflags, out) - finally: - sys.stdout = save_stdout - pdb.set_trace = save_set_trace - linecache.getlines = self.save_linecache_getlines - sys.displayhook = save_displayhook - if clear_globs: - test.globs.clear() - - #///////////////////////////////////////////////////////////////// - # Summarization - #///////////////////////////////////////////////////////////////// - def summarize(self, verbose=None): - """ - Print a summary of all the test cases that have been run by - this DocTestRunner, and return a tuple `(f, t)`, where `f` is - the total number of failed examples, and `t` is the total - number of tried examples. - - The optional `verbose` argument controls how detailed the - summary is. If the verbosity is not specified, then the - DocTestRunner's verbosity is used. - """ - if verbose is None: - verbose = self._verbose - notests = [] - passed = [] - failed = [] - totalt = totalf = 0 - for x in self._name2ft.items(): - name, (f, t) = x - assert f <= t - totalt += t - totalf += f - if t == 0: - notests.append(name) - elif f == 0: - passed.append( (name, t) ) - else: - failed.append(x) - if verbose: - if notests: - print len(notests), "items had no tests:" - notests.sort() - for thing in notests: - print " ", thing - if passed: - print len(passed), "items passed all tests:" - passed.sort() - for thing, count in passed: - print " %3d tests in %s" % (count, thing) - if failed: - print self.DIVIDER - print len(failed), "items had failures:" - failed.sort() - for thing, (f, t) in failed: - print " %3d of %3d in %s" % (f, t, thing) - if verbose: - print totalt, "tests in", len(self._name2ft), "items." - print totalt - totalf, "passed and", totalf, "failed." - if totalf: - print "***Test Failed***", totalf, "failures." - elif verbose: - print "Test passed." - return TestResults(totalf, totalt) - - #///////////////////////////////////////////////////////////////// - # Backward compatibility cruft to maintain doctest.master. - #///////////////////////////////////////////////////////////////// - def merge(self, other): - d = self._name2ft - for name, (f, t) in other._name2ft.items(): - if name in d: - # Don't print here by default, since doing - # so breaks some of the buildbots - #print "*** DocTestRunner.merge: '" + name + "' in both" \ - # " testers; summing outcomes." - f2, t2 = d[name] - f = f + f2 - t = t + t2 - d[name] = f, t - -class OutputChecker: - """ - A class used to check the whether the actual output from a doctest - example matches the expected output. `OutputChecker` defines two - methods: `check_output`, which compares a given pair of outputs, - and returns true if they match; and `output_difference`, which - returns a string describing the differences between two outputs. - """ - def check_output(self, want, got, optionflags): - """ - Return True iff the actual output from an example (`got`) - matches the expected output (`want`). These strings are - always considered to match if they are identical; but - depending on what option flags the test runner is using, - several non-exact match types are also possible. See the - documentation for `TestRunner` for more information about - option flags. - """ - # Handle the common case first, for efficiency: - # if they're string-identical, always return true. - if got == want: - return True - - # The values True and False replaced 1 and 0 as the return - # value for boolean comparisons in Python 2.3. - if not (optionflags & DONT_ACCEPT_TRUE_FOR_1): - if (got,want) == ("True\n", "1\n"): - return True - if (got,want) == ("False\n", "0\n"): - return True - - # can be used as a special sequence to signify a - # blank line, unless the DONT_ACCEPT_BLANKLINE flag is used. - if not (optionflags & DONT_ACCEPT_BLANKLINE): - # Replace in want with a blank line. - want = re.sub('(?m)^%s\s*?$' % re.escape(BLANKLINE_MARKER), - '', want) - # If a line in got contains only spaces, then remove the - # spaces. - got = re.sub('(?m)^\s*?$', '', got) - if got == want: - return True - - # This flag causes doctest to ignore any differences in the - # contents of whitespace strings. Note that this can be used - # in conjunction with the ELLIPSIS flag. - if optionflags & NORMALIZE_WHITESPACE: - got = ' '.join(got.split()) - want = ' '.join(want.split()) - if got == want: - return True - - # The ELLIPSIS flag says to let the sequence "..." in `want` - # match any substring in `got`. - if optionflags & ELLIPSIS: - if _ellipsis_match(want, got): - return True - - # We didn't find any match; return false. - return False - - # Should we do a fancy diff? - def _do_a_fancy_diff(self, want, got, optionflags): - # Not unless they asked for a fancy diff. - if not optionflags & (REPORT_UDIFF | - REPORT_CDIFF | - REPORT_NDIFF): - return False - - # If expected output uses ellipsis, a meaningful fancy diff is - # too hard ... or maybe not. In two real-life failures Tim saw, - # a diff was a major help anyway, so this is commented out. - # [todo] _ellipsis_match() knows which pieces do and don't match, - # and could be the basis for a kick-ass diff in this case. - ##if optionflags & ELLIPSIS and ELLIPSIS_MARKER in want: - ## return False - - # ndiff does intraline difference marking, so can be useful even - # for 1-line differences. - if optionflags & REPORT_NDIFF: - return True - - # The other diff types need at least a few lines to be helpful. - return want.count('\n') > 2 and got.count('\n') > 2 - - def output_difference(self, example, got, optionflags): - """ - Return a string describing the differences between the - expected output for a given example (`example`) and the actual - output (`got`). `optionflags` is the set of option flags used - to compare `want` and `got`. - """ - want = example.want - # If s are being used, then replace blank lines - # with in the actual output string. - if not (optionflags & DONT_ACCEPT_BLANKLINE): - got = re.sub('(?m)^[ ]*(?=\n)', BLANKLINE_MARKER, got) - - # Check if we should use diff. - if self._do_a_fancy_diff(want, got, optionflags): - # Split want & got into lines. - want_lines = want.splitlines(True) # True == keep line ends - got_lines = got.splitlines(True) - # Use difflib to find their differences. - if optionflags & REPORT_UDIFF: - diff = difflib.unified_diff(want_lines, got_lines, n=2) - diff = list(diff)[2:] # strip the diff header - kind = 'unified diff with -expected +actual' - elif optionflags & REPORT_CDIFF: - diff = difflib.context_diff(want_lines, got_lines, n=2) - diff = list(diff)[2:] # strip the diff header - kind = 'context diff with expected followed by actual' - elif optionflags & REPORT_NDIFF: - engine = difflib.Differ(charjunk=difflib.IS_CHARACTER_JUNK) - diff = list(engine.compare(want_lines, got_lines)) - kind = 'ndiff with -expected +actual' - else: - assert 0, 'Bad diff option' - # Remove trailing whitespace on diff output. - diff = [line.rstrip() + '\n' for line in diff] - return 'Differences (%s):\n' % kind + _indent(''.join(diff)) - - # If we're not using diff, then simply list the expected - # output followed by the actual output. - if want and got: - return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got)) - elif want: - return 'Expected:\n%sGot nothing\n' % _indent(want) - elif got: - return 'Expected nothing\nGot:\n%s' % _indent(got) - else: - return 'Expected nothing\nGot nothing\n' - -class DocTestFailure(Exception): - """A DocTest example has failed in debugging mode. - - The exception instance has variables: - - - test: the DocTest object being run - - - example: the Example object that failed - - - got: the actual output - """ - def __init__(self, test, example, got): - self.test = test - self.example = example - self.got = got - - def __str__(self): - return str(self.test) - -class UnexpectedException(Exception): - """A DocTest example has encountered an unexpected exception - - The exception instance has variables: - - - test: the DocTest object being run - - - example: the Example object that failed - - - exc_info: the exception info - """ - def __init__(self, test, example, exc_info): - self.test = test - self.example = example - self.exc_info = exc_info - - def __str__(self): - return str(self.test) - -class DebugRunner(DocTestRunner): - r"""Run doc tests but raise an exception as soon as there is a failure. - - If an unexpected exception occurs, an UnexpectedException is raised. - It contains the test, the example, and the original exception: - - >>> runner = DebugRunner(verbose=False) - >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42', - ... {}, 'foo', 'foo.py', 0) - >>> try: - ... runner.run(test) - ... except UnexpectedException, failure: - ... pass - - >>> failure.test is test - True - - >>> failure.example.want - '42\n' - - >>> exc_info = failure.exc_info - >>> raise exc_info[0], exc_info[1], exc_info[2] - Traceback (most recent call last): - ... - KeyError - - We wrap the original exception to give the calling application - access to the test and example information. - - If the output doesn't match, then a DocTestFailure is raised: - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 1 - ... >>> x - ... 2 - ... ''', {}, 'foo', 'foo.py', 0) - - >>> try: - ... runner.run(test) - ... except DocTestFailure, failure: - ... pass - - DocTestFailure objects provide access to the test: - - >>> failure.test is test - True - - As well as to the example: - - >>> failure.example.want - '2\n' - - and the actual output: - - >>> failure.got - '1\n' - - If a failure or error occurs, the globals are left intact: - - >>> del test.globs['__builtins__'] - >>> test.globs - {'x': 1} - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 2 - ... >>> raise KeyError - ... ''', {}, 'foo', 'foo.py', 0) - - >>> runner.run(test) - Traceback (most recent call last): - ... - UnexpectedException: - - >>> del test.globs['__builtins__'] - >>> test.globs - {'x': 2} - - But the globals are cleared if there is no error: - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 2 - ... ''', {}, 'foo', 'foo.py', 0) - - >>> runner.run(test) - TestResults(failed=0, attempted=1) - - >>> test.globs - {} - - """ - - def run(self, test, compileflags=None, out=None, clear_globs=True): - r = DocTestRunner.run(self, test, compileflags, out, False) - if clear_globs: - test.globs.clear() - return r - - def report_unexpected_exception(self, out, test, example, exc_info): - raise UnexpectedException(test, example, exc_info) - - def report_failure(self, out, test, example, got): - raise DocTestFailure(test, example, got) - -###################################################################### -## 6. Test Functions -###################################################################### -# These should be backwards compatible. - -# For backward compatibility, a global instance of a DocTestRunner -# class, updated by testmod. -master = None - -def testmod(m=None, name=None, globs=None, verbose=None, - report=True, optionflags=0, extraglobs=None, - raise_on_error=False, exclude_empty=False): - """m=None, name=None, globs=None, verbose=None, report=True, - optionflags=0, extraglobs=None, raise_on_error=False, - exclude_empty=False - - Test examples in docstrings in functions and classes reachable - from module m (or the current module if m is not supplied), starting - with m.__doc__. - - Also test examples reachable from dict m.__test__ if it exists and is - not None. m.__test__ maps names to functions, classes and strings; - function and class docstrings are tested even if the name is private; - strings are tested directly, as if they were docstrings. - - Return (#failures, #tests). - - See help(doctest) for an overview. - - Optional keyword arg "name" gives the name of the module; by default - use m.__name__. - - Optional keyword arg "globs" gives a dict to be used as the globals - when executing examples; by default, use m.__dict__. A copy of this - dict is actually used for each docstring, so that each docstring's - examples start with a clean slate. - - Optional keyword arg "extraglobs" gives a dictionary that should be - merged into the globals that are used to execute examples. By - default, no extra globals are used. This is new in 2.4. - - Optional keyword arg "verbose" prints lots of stuff if true, prints - only failures if false; by default, it's true iff "-v" is in sys.argv. - - Optional keyword arg "report" prints a summary at the end when true, - else prints nothing at the end. In verbose mode, the summary is - detailed, else very brief (in fact, empty if all tests passed). - - Optional keyword arg "optionflags" or's together module constants, - and defaults to 0. This is new in 2.3. Possible values (see the - docs for details): - - DONT_ACCEPT_TRUE_FOR_1 - DONT_ACCEPT_BLANKLINE - NORMALIZE_WHITESPACE - ELLIPSIS - SKIP - IGNORE_EXCEPTION_DETAIL - REPORT_UDIFF - REPORT_CDIFF - REPORT_NDIFF - REPORT_ONLY_FIRST_FAILURE - - Optional keyword arg "raise_on_error" raises an exception on the - first unexpected exception or failure. This allows failures to be - post-mortem debugged. - - Advanced tomfoolery: testmod runs methods of a local instance of - class doctest.Tester, then merges the results into (or creates) - global Tester instance doctest.master. Methods of doctest.master - can be called directly too, if you want to do something unusual. - Passing report=0 to testmod is especially useful then, to delay - displaying a summary. Invoke doctest.master.summarize(verbose) - when you're done fiddling. - """ - global master - - # If no module was given, then use __main__. - if m is None: - # DWA - m will still be None if this wasn't invoked from the command - # line, in which case the following TypeError is about as good an error - # as we should expect - m = sys.modules.get('__main__') - - # Check that we were actually given a module. - if not inspect.ismodule(m): - raise TypeError("testmod: module required; %r" % (m,)) - - # If no name was given, then use the module's name. - if name is None: - name = m.__name__ - - # Find, parse, and run all tests in the given module. - finder = DocTestFinder(exclude_empty=exclude_empty) - - if raise_on_error: - runner = DebugRunner(verbose=verbose, optionflags=optionflags) - else: - runner = DocTestRunner(verbose=verbose, optionflags=optionflags) - - for test in finder.find(m, name, globs=globs, extraglobs=extraglobs): - runner.run(test) - - if report: - runner.summarize() - - if master is None: - master = runner - else: - master.merge(runner) - - return TestResults(runner.failures, runner.tries) - -def testfile(filename, module_relative=True, name=None, package=None, - globs=None, verbose=None, report=True, optionflags=0, - extraglobs=None, raise_on_error=False, parser=DocTestParser(), - encoding=None): - """ - Test examples in the given file. Return (#failures, #tests). - - Optional keyword arg "module_relative" specifies how filenames - should be interpreted: - - - If "module_relative" is True (the default), then "filename" - specifies a module-relative path. By default, this path is - relative to the calling module's directory; but if the - "package" argument is specified, then it is relative to that - package. To ensure os-independence, "filename" should use - "/" characters to separate path segments, and should not - be an absolute path (i.e., it may not begin with "/"). - - - If "module_relative" is False, then "filename" specifies an - os-specific path. The path may be absolute or relative (to - the current working directory). - - Optional keyword arg "name" gives the name of the test; by default - use the file's basename. - - Optional keyword argument "package" is a Python package or the - name of a Python package whose directory should be used as the - base directory for a module relative filename. If no package is - specified, then the calling module's directory is used as the base - directory for module relative filenames. It is an error to - specify "package" if "module_relative" is False. - - Optional keyword arg "globs" gives a dict to be used as the globals - when executing examples; by default, use {}. A copy of this dict - is actually used for each docstring, so that each docstring's - examples start with a clean slate. - - Optional keyword arg "extraglobs" gives a dictionary that should be - merged into the globals that are used to execute examples. By - default, no extra globals are used. - - Optional keyword arg "verbose" prints lots of stuff if true, prints - only failures if false; by default, it's true iff "-v" is in sys.argv. - - Optional keyword arg "report" prints a summary at the end when true, - else prints nothing at the end. In verbose mode, the summary is - detailed, else very brief (in fact, empty if all tests passed). - - Optional keyword arg "optionflags" or's together module constants, - and defaults to 0. Possible values (see the docs for details): - - DONT_ACCEPT_TRUE_FOR_1 - DONT_ACCEPT_BLANKLINE - NORMALIZE_WHITESPACE - ELLIPSIS - SKIP - IGNORE_EXCEPTION_DETAIL - REPORT_UDIFF - REPORT_CDIFF - REPORT_NDIFF - REPORT_ONLY_FIRST_FAILURE - - Optional keyword arg "raise_on_error" raises an exception on the - first unexpected exception or failure. This allows failures to be - post-mortem debugged. - - Optional keyword arg "parser" specifies a DocTestParser (or - subclass) that should be used to extract tests from the files. - - Optional keyword arg "encoding" specifies an encoding that should - be used to convert the file to unicode. - - Advanced tomfoolery: testmod runs methods of a local instance of - class doctest.Tester, then merges the results into (or creates) - global Tester instance doctest.master. Methods of doctest.master - can be called directly too, if you want to do something unusual. - Passing report=0 to testmod is especially useful then, to delay - displaying a summary. Invoke doctest.master.summarize(verbose) - when you're done fiddling. - """ - global master - - if package and not module_relative: - raise ValueError("Package may only be specified for module-" - "relative paths.") - - # Relativize the path - text, filename = _load_testfile(filename, package, module_relative) - - # If no name was given, then use the file's name. - if name is None: - name = os.path.basename(filename) - - # Assemble the globals. - if globs is None: - globs = {} - else: - globs = globs.copy() - if extraglobs is not None: - globs.update(extraglobs) - if '__name__' not in globs: - globs['__name__'] = '__main__' - - if raise_on_error: - runner = DebugRunner(verbose=verbose, optionflags=optionflags) - else: - runner = DocTestRunner(verbose=verbose, optionflags=optionflags) - - if encoding is not None: - text = text.decode(encoding) - - # Read the file, convert it to a test, and run it. - test = parser.get_doctest(text, globs, name, filename, 0) - runner.run(test) - - if report: - runner.summarize() - - if master is None: - master = runner - else: - master.merge(runner) - - return TestResults(runner.failures, runner.tries) - -def run_docstring_examples(f, globs, verbose=False, name="NoName", - compileflags=None, optionflags=0): - """ - Test examples in the given object's docstring (`f`), using `globs` - as globals. Optional argument `name` is used in failure messages. - If the optional argument `verbose` is true, then generate output - even if there are no failures. - - `compileflags` gives the set of flags that should be used by the - Python compiler when running the examples. If not specified, then - it will default to the set of future-import flags that apply to - `globs`. - - Optional keyword arg `optionflags` specifies options for the - testing and output. See the documentation for `testmod` for more - information. - """ - # Find, parse, and run all tests in the given module. - finder = DocTestFinder(verbose=verbose, recurse=False) - runner = DocTestRunner(verbose=verbose, optionflags=optionflags) - for test in finder.find(f, name, globs=globs): - runner.run(test, compileflags=compileflags) - -###################################################################### -## 7. Tester -###################################################################### -# This is provided only for backwards compatibility. It's not -# actually used in any way. - -class Tester: - def __init__(self, mod=None, globs=None, verbose=None, optionflags=0): - - warnings.warn("class Tester is deprecated; " - "use class doctest.DocTestRunner instead", - DeprecationWarning, stacklevel=2) - if mod is None and globs is None: - raise TypeError("Tester.__init__: must specify mod or globs") - if mod is not None and not inspect.ismodule(mod): - raise TypeError("Tester.__init__: mod must be a module; %r" % - (mod,)) - if globs is None: - globs = mod.__dict__ - self.globs = globs - - self.verbose = verbose - self.optionflags = optionflags - self.testfinder = DocTestFinder() - self.testrunner = DocTestRunner(verbose=verbose, - optionflags=optionflags) - - def runstring(self, s, name): - test = DocTestParser().get_doctest(s, self.globs, name, None, None) - if self.verbose: - print "Running string", name - (f,t) = self.testrunner.run(test) - if self.verbose: - print f, "of", t, "examples failed in string", name - return TestResults(f,t) - - def rundoc(self, object, name=None, module=None): - f = t = 0 - tests = self.testfinder.find(object, name, module=module, - globs=self.globs) - for test in tests: - (f2, t2) = self.testrunner.run(test) - (f,t) = (f+f2, t+t2) - return TestResults(f,t) - - def rundict(self, d, name, module=None): - import types - m = types.ModuleType(name) - m.__dict__.update(d) - if module is None: - module = False - return self.rundoc(m, name, module) - - def run__test__(self, d, name): - import types - m = types.ModuleType(name) - m.__test__ = d - return self.rundoc(m, name) - - def summarize(self, verbose=None): - return self.testrunner.summarize(verbose) - - def merge(self, other): - self.testrunner.merge(other.testrunner) - -###################################################################### -## 8. Unittest Support -###################################################################### - -_unittest_reportflags = 0 - -def set_unittest_reportflags(flags): - """Sets the unittest option flags. - - The old flag is returned so that a runner could restore the old - value if it wished to: - - >>> import doctest - >>> old = doctest._unittest_reportflags - >>> doctest.set_unittest_reportflags(REPORT_NDIFF | - ... REPORT_ONLY_FIRST_FAILURE) == old - True - - >>> doctest._unittest_reportflags == (REPORT_NDIFF | - ... REPORT_ONLY_FIRST_FAILURE) - True - - Only reporting flags can be set: - - >>> doctest.set_unittest_reportflags(ELLIPSIS) - Traceback (most recent call last): - ... - ValueError: ('Only reporting flags allowed', 8) - - >>> doctest.set_unittest_reportflags(old) == (REPORT_NDIFF | - ... REPORT_ONLY_FIRST_FAILURE) - True - """ - global _unittest_reportflags - - if (flags & REPORTING_FLAGS) != flags: - raise ValueError("Only reporting flags allowed", flags) - old = _unittest_reportflags - _unittest_reportflags = flags - return old - - -class DocTestCase(unittest.TestCase): - - def __init__(self, test, optionflags=0, setUp=None, tearDown=None, - checker=None): - - unittest.TestCase.__init__(self) - self._dt_optionflags = optionflags - self._dt_checker = checker - self._dt_test = test - self._dt_setUp = setUp - self._dt_tearDown = tearDown - - def setUp(self): - test = self._dt_test - - if self._dt_setUp is not None: - self._dt_setUp(test) - - def tearDown(self): - test = self._dt_test - - if self._dt_tearDown is not None: - self._dt_tearDown(test) - - test.globs.clear() - - def runTest(self): - test = self._dt_test - old = sys.stdout - new = StringIO() - optionflags = self._dt_optionflags - - if not (optionflags & REPORTING_FLAGS): - # The option flags don't include any reporting flags, - # so add the default reporting flags - optionflags |= _unittest_reportflags - - runner = DocTestRunner(optionflags=optionflags, - checker=self._dt_checker, verbose=False) - - try: - runner.DIVIDER = "-"*70 - failures, tries = runner.run( - test, out=new.write, clear_globs=False) - finally: - sys.stdout = old - - if failures: - raise self.failureException(self.format_failure(new.getvalue())) - - def format_failure(self, err): - test = self._dt_test - if test.lineno is None: - lineno = 'unknown line number' - else: - lineno = '%s' % test.lineno - lname = '.'.join(test.name.split('.')[-1:]) - return ('Failed doctest test for %s\n' - ' File "%s", line %s, in %s\n\n%s' - % (test.name, test.filename, lineno, lname, err) - ) - - def debug(self): - r"""Run the test case without results and without catching exceptions - - The unit test framework includes a debug method on test cases - and test suites to support post-mortem debugging. The test code - is run in such a way that errors are not caught. This way a - caller can catch the errors and initiate post-mortem debugging. - - The DocTestCase provides a debug method that raises - UnexpectedException errors if there is an unexpected - exception: - - >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42', - ... {}, 'foo', 'foo.py', 0) - >>> case = DocTestCase(test) - >>> try: - ... case.debug() - ... except UnexpectedException, failure: - ... pass - - The UnexpectedException contains the test, the example, and - the original exception: - - >>> failure.test is test - True - - >>> failure.example.want - '42\n' - - >>> exc_info = failure.exc_info - >>> raise exc_info[0], exc_info[1], exc_info[2] - Traceback (most recent call last): - ... - KeyError - - If the output doesn't match, then a DocTestFailure is raised: - - >>> test = DocTestParser().get_doctest(''' - ... >>> x = 1 - ... >>> x - ... 2 - ... ''', {}, 'foo', 'foo.py', 0) - >>> case = DocTestCase(test) - - >>> try: - ... case.debug() - ... except DocTestFailure, failure: - ... pass - - DocTestFailure objects provide access to the test: - - >>> failure.test is test - True - - As well as to the example: - - >>> failure.example.want - '2\n' - - and the actual output: - - >>> failure.got - '1\n' - - """ - - self.setUp() - runner = DebugRunner(optionflags=self._dt_optionflags, - checker=self._dt_checker, verbose=False) - runner.run(self._dt_test, clear_globs=False) - self.tearDown() - - def id(self): - return self._dt_test.name - - def __eq__(self, other): - if type(self) is not type(other): - return NotImplemented - - return self._dt_test == other._dt_test and \ - self._dt_optionflags == other._dt_optionflags and \ - self._dt_setUp == other._dt_setUp and \ - self._dt_tearDown == other._dt_tearDown and \ - self._dt_checker == other._dt_checker - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((self._dt_optionflags, self._dt_setUp, self._dt_tearDown, - self._dt_checker)) - - def __repr__(self): - name = self._dt_test.name.split('.') - return "%s (%s)" % (name[-1], '.'.join(name[:-1])) - - __str__ = __repr__ - - def shortDescription(self): - return "Doctest: " + self._dt_test.name - -class SkipDocTestCase(DocTestCase): - def __init__(self, module): - self.module = module - DocTestCase.__init__(self, None) - - def setUp(self): - self.skipTest("DocTestSuite will not work with -O2 and above") - - def test_skip(self): - pass - - def shortDescription(self): - return "Skipping tests from %s" % self.module.__name__ - - __str__ = shortDescription - - -def DocTestSuite(module=None, globs=None, extraglobs=None, test_finder=None, - **options): - """ - Convert doctest tests for a module to a unittest test suite. - - This converts each documentation string in a module that - contains doctest tests to a unittest test case. If any of the - tests in a doc string fail, then the test case fails. An exception - is raised showing the name of the file containing the test and a - (sometimes approximate) line number. - - The `module` argument provides the module to be tested. The argument - can be either a module or a module name. - - If no argument is given, the calling module is used. - - A number of options may be provided as keyword arguments: - - setUp - A set-up function. This is called before running the - tests in each file. The setUp function will be passed a DocTest - object. The setUp function can access the test globals as the - globs attribute of the test passed. - - tearDown - A tear-down function. This is called after running the - tests in each file. The tearDown function will be passed a DocTest - object. The tearDown function can access the test globals as the - globs attribute of the test passed. - - globs - A dictionary containing initial global variables for the tests. - - optionflags - A set of doctest option flags expressed as an integer. - """ - - if test_finder is None: - test_finder = DocTestFinder() - - module = _normalize_module(module) - tests = test_finder.find(module, globs=globs, extraglobs=extraglobs) - - if not tests and sys.flags.optimize >=2: - # Skip doctests when running with -O2 - suite = unittest.TestSuite() - suite.addTest(SkipDocTestCase(module)) - return suite - elif not tests: - # Why do we want to do this? Because it reveals a bug that might - # otherwise be hidden. - # It is probably a bug that this exception is not also raised if the - # number of doctest examples in tests is zero (i.e. if no doctest - # examples were found). However, we should probably not be raising - # an exception at all here, though it is too late to make this change - # for a maintenance release. See also issue #14649. - raise ValueError(module, "has no docstrings") - - tests.sort() - suite = unittest.TestSuite() - - for test in tests: - if len(test.examples) == 0: - continue - if not test.filename: - filename = module.__file__ - if filename[-4:] in (".pyc", ".pyo"): - filename = filename[:-1] - test.filename = filename - suite.addTest(DocTestCase(test, **options)) - - return suite - -class DocFileCase(DocTestCase): - - def id(self): - return '_'.join(self._dt_test.name.split('.')) - - def __repr__(self): - return self._dt_test.filename - __str__ = __repr__ - - def format_failure(self, err): - return ('Failed doctest test for %s\n File "%s", line 0\n\n%s' - % (self._dt_test.name, self._dt_test.filename, err) - ) - -def DocFileTest(path, module_relative=True, package=None, - globs=None, parser=DocTestParser(), - encoding=None, **options): - if globs is None: - globs = {} - else: - globs = globs.copy() - - if package and not module_relative: - raise ValueError("Package may only be specified for module-" - "relative paths.") - - # Relativize the path. - doc, path = _load_testfile(path, package, module_relative) - - if "__file__" not in globs: - globs["__file__"] = path - - # Find the file and read it. - name = os.path.basename(path) - - # If an encoding is specified, use it to convert the file to unicode - if encoding is not None: - doc = doc.decode(encoding) - - # Convert it to a test, and wrap it in a DocFileCase. - test = parser.get_doctest(doc, globs, name, path, 0) - return DocFileCase(test, **options) - -def DocFileSuite(*paths, **kw): - """A unittest suite for one or more doctest files. - - The path to each doctest file is given as a string; the - interpretation of that string depends on the keyword argument - "module_relative". - - A number of options may be provided as keyword arguments: - - module_relative - If "module_relative" is True, then the given file paths are - interpreted as os-independent module-relative paths. By - default, these paths are relative to the calling module's - directory; but if the "package" argument is specified, then - they are relative to that package. To ensure os-independence, - "filename" should use "/" characters to separate path - segments, and may not be an absolute path (i.e., it may not - begin with "/"). - - If "module_relative" is False, then the given file paths are - interpreted as os-specific paths. These paths may be absolute - or relative (to the current working directory). - - package - A Python package or the name of a Python package whose directory - should be used as the base directory for module relative paths. - If "package" is not specified, then the calling module's - directory is used as the base directory for module relative - filenames. It is an error to specify "package" if - "module_relative" is False. - - setUp - A set-up function. This is called before running the - tests in each file. The setUp function will be passed a DocTest - object. The setUp function can access the test globals as the - globs attribute of the test passed. - - tearDown - A tear-down function. This is called after running the - tests in each file. The tearDown function will be passed a DocTest - object. The tearDown function can access the test globals as the - globs attribute of the test passed. - - globs - A dictionary containing initial global variables for the tests. - - optionflags - A set of doctest option flags expressed as an integer. - - parser - A DocTestParser (or subclass) that should be used to extract - tests from the files. - - encoding - An encoding that will be used to convert the files to unicode. - """ - suite = unittest.TestSuite() - - # We do this here so that _normalize_module is called at the right - # level. If it were called in DocFileTest, then this function - # would be the caller and we might guess the package incorrectly. - if kw.get('module_relative', True): - kw['package'] = _normalize_module(kw.get('package')) - - for path in paths: - suite.addTest(DocFileTest(path, **kw)) - - return suite - -###################################################################### -## 9. Debugging Support -###################################################################### - -def script_from_examples(s): - r"""Extract script from text with examples. - - Converts text with examples to a Python script. Example input is - converted to regular code. Example output and all other words - are converted to comments: - - >>> text = ''' - ... Here are examples of simple math. - ... - ... Python has super accurate integer addition - ... - ... >>> 2 + 2 - ... 5 - ... - ... And very friendly error messages: - ... - ... >>> 1/0 - ... To Infinity - ... And - ... Beyond - ... - ... You can use logic if you want: - ... - ... >>> if 0: - ... ... blah - ... ... blah - ... ... - ... - ... Ho hum - ... ''' - - >>> print script_from_examples(text) - # Here are examples of simple math. - # - # Python has super accurate integer addition - # - 2 + 2 - # Expected: - ## 5 - # - # And very friendly error messages: - # - 1/0 - # Expected: - ## To Infinity - ## And - ## Beyond - # - # You can use logic if you want: - # - if 0: - blah - blah - # - # Ho hum - - """ - output = [] - for piece in DocTestParser().parse(s): - if isinstance(piece, Example): - # Add the example's source code (strip trailing NL) - output.append(piece.source[:-1]) - # Add the expected output: - want = piece.want - if want: - output.append('# Expected:') - output += ['## '+l for l in want.split('\n')[:-1]] - else: - # Add non-example text. - output += [_comment_line(l) - for l in piece.split('\n')[:-1]] - - # Trim junk on both ends. - while output and output[-1] == '#': - output.pop() - while output and output[0] == '#': - output.pop(0) - # Combine the output, and return it. - # Add a courtesy newline to prevent exec from choking (see bug #1172785) - return '\n'.join(output) + '\n' - -def testsource(module, name): - """Extract the test sources from a doctest docstring as a script. - - Provide the module (or dotted name of the module) containing the - test to be debugged and the name (within the module) of the object - with the doc string with tests to be debugged. - """ - module = _normalize_module(module) - tests = DocTestFinder().find(module) - test = [t for t in tests if t.name == name] - if not test: - raise ValueError(name, "not found in tests") - test = test[0] - testsrc = script_from_examples(test.docstring) - return testsrc - -def debug_src(src, pm=False, globs=None): - """Debug a single doctest docstring, in argument `src`'""" - testsrc = script_from_examples(src) - debug_script(testsrc, pm, globs) - -def debug_script(src, pm=False, globs=None): - "Debug a test script. `src` is the script, as a string." - import pdb - - # Note that tempfile.NameTemporaryFile() cannot be used. As the - # docs say, a file so created cannot be opened by name a second time - # on modern Windows boxes, and execfile() needs to open it. - srcfilename = tempfile.mktemp(".py", "doctestdebug") - f = open(srcfilename, 'w') - f.write(src) - f.close() - - try: - if globs: - globs = globs.copy() - else: - globs = {} - - if pm: - try: - execfile(srcfilename, globs, globs) - except: - print sys.exc_info()[1] - pdb.post_mortem(sys.exc_info()[2]) - else: - # Note that %r is vital here. '%s' instead can, e.g., cause - # backslashes to get treated as metacharacters on Windows. - pdb.run("execfile(%r)" % srcfilename, globs, globs) - - finally: - os.remove(srcfilename) - -def debug(module, name, pm=False): - """Debug a single doctest docstring. - - Provide the module (or dotted name of the module) containing the - test to be debugged and the name (within the module) of the object - with the docstring with tests to be debugged. - """ - module = _normalize_module(module) - testsrc = testsource(module, name) - debug_script(testsrc, pm, module.__dict__) - -###################################################################### -## 10. Example Usage -###################################################################### -class _TestClass: - """ - A pointless class, for sanity-checking of docstring testing. - - Methods: - square() - get() - - >>> _TestClass(13).get() + _TestClass(-12).get() - 1 - >>> hex(_TestClass(13).square().get()) - '0xa9' - """ - - def __init__(self, val): - """val -> _TestClass object with associated value val. - - >>> t = _TestClass(123) - >>> print t.get() - 123 - """ - - self.val = val - - def square(self): - """square() -> square TestClass's associated value - - >>> _TestClass(13).square().get() - 169 - """ - - self.val = self.val ** 2 - return self - - def get(self): - """get() -> return TestClass's associated value. - - >>> x = _TestClass(-42) - >>> print x.get() - -42 - """ - - return self.val - -__test__ = {"_TestClass": _TestClass, - "string": r""" - Example of a string object, searched as-is. - >>> x = 1; y = 2 - >>> x + y, x * y - (3, 2) - """, - - "bool-int equivalence": r""" - In 2.2, boolean expressions displayed - 0 or 1. By default, we still accept - them. This can be disabled by passing - DONT_ACCEPT_TRUE_FOR_1 to the new - optionflags argument. - >>> 4 == 4 - 1 - >>> 4 == 4 - True - >>> 4 > 4 - 0 - >>> 4 > 4 - False - """, - - "blank lines": r""" - Blank lines can be marked with : - >>> print 'foo\n\nbar\n' - foo - - bar - - """, - - "ellipsis": r""" - If the ellipsis flag is used, then '...' can be used to - elide substrings in the desired output: - >>> print range(1000) #doctest: +ELLIPSIS - [0, 1, 2, ..., 999] - """, - - "whitespace normalization": r""" - If the whitespace normalization flag is used, then - differences in whitespace are ignored. - >>> print range(30) #doctest: +NORMALIZE_WHITESPACE - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29] - """, - } - - -def _test(): - testfiles = [arg for arg in sys.argv[1:] if arg and arg[0] != '-'] - if not testfiles: - name = os.path.basename(sys.argv[0]) - if '__loader__' in globals(): # python -m - name, _ = os.path.splitext(name) - print("usage: {0} [-v] file ...".format(name)) - return 2 - for filename in testfiles: - if filename.endswith(".py"): - # It is a module -- insert its dir into sys.path and try to - # import it. If it is part of a package, that possibly - # won't work because of package imports. - dirname, filename = os.path.split(filename) - sys.path.insert(0, dirname) - m = __import__(filename[:-3]) - del sys.path[0] - failures, _ = testmod(m) - else: - failures, _ = testfile(filename, module_relative=False) - if failures: - return 1 - return 0 - - -if __name__ == "__main__": - sys.exit(_test()) diff --git a/python/Lib/dumbdbm.py b/python/Lib/dumbdbm.py deleted file mode 100755 index 7c9c441285..0000000000 --- a/python/Lib/dumbdbm.py +++ /dev/null @@ -1,253 +0,0 @@ -"""A dumb and slow but simple dbm clone. - -For database spam, spam.dir contains the index (a text file), -spam.bak *may* contain a backup of the index (also a text file), -while spam.dat contains the data (a binary file). - -XXX TO DO: - -- seems to contain a bug when updating... - -- reclaim free space (currently, space once occupied by deleted or expanded -items is never reused) - -- support concurrent access (currently, if two processes take turns making -updates, they can mess up the index) - -- support efficient access to large databases (currently, the whole index -is read when the database is opened, and some updates rewrite the whole index) - -- support opening for read-only (flag = 'm') - -""" - -import ast as _ast -import os as _os -import __builtin__ -import UserDict - -_open = __builtin__.open - -_BLOCKSIZE = 512 - -error = IOError # For anydbm - -class _Database(UserDict.DictMixin): - - # The on-disk directory and data files can remain in mutually - # inconsistent states for an arbitrarily long time (see comments - # at the end of __setitem__). This is only repaired when _commit() - # gets called. One place _commit() gets called is from __del__(), - # and if that occurs at program shutdown time, module globals may - # already have gotten rebound to None. Since it's crucial that - # _commit() finish successfully, we can't ignore shutdown races - # here, and _commit() must not reference any globals. - _os = _os # for _commit() - _open = _open # for _commit() - - def __init__(self, filebasename, mode, flag='c'): - self._mode = mode - self._readonly = (flag == 'r') - - # The directory file is a text file. Each line looks like - # "%r, (%d, %d)\n" % (key, pos, siz) - # where key is the string key, pos is the offset into the dat - # file of the associated value's first byte, and siz is the number - # of bytes in the associated value. - self._dirfile = filebasename + _os.extsep + 'dir' - - # The data file is a binary file pointed into by the directory - # file, and holds the values associated with keys. Each value - # begins at a _BLOCKSIZE-aligned byte offset, and is a raw - # binary 8-bit string value. - self._datfile = filebasename + _os.extsep + 'dat' - self._bakfile = filebasename + _os.extsep + 'bak' - - # The index is an in-memory dict, mirroring the directory file. - self._index = None # maps keys to (pos, siz) pairs - - # Mod by Jack: create data file if needed - try: - f = _open(self._datfile, 'r') - except IOError: - with _open(self._datfile, 'w') as f: - self._chmod(self._datfile) - else: - f.close() - self._update() - - # Read directory file into the in-memory index dict. - def _update(self): - self._index = {} - try: - f = _open(self._dirfile) - except IOError: - self._modified = not self._readonly - else: - self._modified = False - with f: - for line in f: - line = line.rstrip() - key, pos_and_siz_pair = _ast.literal_eval(line) - self._index[key] = pos_and_siz_pair - - # Write the index dict to the directory file. The original directory - # file (if any) is renamed with a .bak extension first. If a .bak - # file currently exists, it's deleted. - def _commit(self): - # CAUTION: It's vital that _commit() succeed, and _commit() can - # be called from __del__(). Therefore we must never reference a - # global in this routine. - if self._index is None or not self._modified: - return # nothing to do - - try: - self._os.unlink(self._bakfile) - except self._os.error: - pass - - try: - self._os.rename(self._dirfile, self._bakfile) - except self._os.error: - pass - - with self._open(self._dirfile, 'w') as f: - self._chmod(self._dirfile) - for key, pos_and_siz_pair in self._index.iteritems(): - f.write("%r, %r\n" % (key, pos_and_siz_pair)) - - sync = _commit - - def __getitem__(self, key): - pos, siz = self._index[key] # may raise KeyError - with _open(self._datfile, 'rb') as f: - f.seek(pos) - dat = f.read(siz) - return dat - - # Append val to the data file, starting at a _BLOCKSIZE-aligned - # offset. The data file is first padded with NUL bytes (if needed) - # to get to an aligned offset. Return pair - # (starting offset of val, len(val)) - def _addval(self, val): - with _open(self._datfile, 'rb+') as f: - f.seek(0, 2) - pos = int(f.tell()) - npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE - f.write('\0'*(npos-pos)) - pos = npos - f.write(val) - return (pos, len(val)) - - # Write val to the data file, starting at offset pos. The caller - # is responsible for ensuring that there's enough room starting at - # pos to hold val, without overwriting some other value. Return - # pair (pos, len(val)). - def _setval(self, pos, val): - with _open(self._datfile, 'rb+') as f: - f.seek(pos) - f.write(val) - return (pos, len(val)) - - # key is a new key whose associated value starts in the data file - # at offset pos and with length siz. Add an index record to - # the in-memory index dict, and append one to the directory file. - def _addkey(self, key, pos_and_siz_pair): - self._index[key] = pos_and_siz_pair - with _open(self._dirfile, 'a') as f: - self._chmod(self._dirfile) - f.write("%r, %r\n" % (key, pos_and_siz_pair)) - - def __setitem__(self, key, val): - if not type(key) == type('') == type(val): - raise TypeError, "keys and values must be strings" - self._modified = True - if key not in self._index: - self._addkey(key, self._addval(val)) - else: - # See whether the new value is small enough to fit in the - # (padded) space currently occupied by the old value. - pos, siz = self._index[key] - oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE - newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE - if newblocks <= oldblocks: - self._index[key] = self._setval(pos, val) - else: - # The new value doesn't fit in the (padded) space used - # by the old value. The blocks used by the old value are - # forever lost. - self._index[key] = self._addval(val) - - # Note that _index may be out of synch with the directory - # file now: _setval() and _addval() don't update the directory - # file. This also means that the on-disk directory and data - # files are in a mutually inconsistent state, and they'll - # remain that way until _commit() is called. Note that this - # is a disaster (for the database) if the program crashes - # (so that _commit() never gets called). - - def __delitem__(self, key): - self._modified = True - # The blocks used by the associated value are lost. - del self._index[key] - # XXX It's unclear why we do a _commit() here (the code always - # XXX has, so I'm not changing it). _setitem__ doesn't try to - # XXX keep the directory file in synch. Why should we? Or - # XXX why shouldn't __setitem__? - self._commit() - - def keys(self): - return self._index.keys() - - def has_key(self, key): - return key in self._index - - def __contains__(self, key): - return key in self._index - - def iterkeys(self): - return self._index.iterkeys() - __iter__ = iterkeys - - def __len__(self): - return len(self._index) - - def close(self): - try: - self._commit() - finally: - self._index = self._datfile = self._dirfile = self._bakfile = None - - __del__ = close - - def _chmod (self, file): - if hasattr(self._os, 'chmod'): - self._os.chmod(file, self._mode) - - -def open(file, flag=None, mode=0666): - """Open the database file, filename, and return corresponding object. - - The flag argument, used to control how the database is opened in the - other DBM implementations, is ignored in the dumbdbm module; the - database is always opened for update, and will be created if it does - not exist. - - The optional mode argument is the UNIX mode of the file, used only when - the database has to be created. It defaults to octal code 0666 (and - will be modified by the prevailing umask). - - """ - # flag argument is currently ignored - - # Modify mode depending on the umask - try: - um = _os.umask(0) - _os.umask(um) - except AttributeError: - pass - else: - # Turn off any bits that are set in the umask - mode = mode & (~um) - - return _Database(file, mode, flag) diff --git a/python/Lib/dummy_thread.py b/python/Lib/dummy_thread.py deleted file mode 100755 index 198dc49dba..0000000000 --- a/python/Lib/dummy_thread.py +++ /dev/null @@ -1,145 +0,0 @@ -"""Drop-in replacement for the thread module. - -Meant to be used as a brain-dead substitute so that threaded code does -not need to be rewritten for when the thread module is not present. - -Suggested usage is:: - - try: - import thread - except ImportError: - import dummy_thread as thread - -""" -# Exports only things specified by thread documentation; -# skipping obsolete synonyms allocate(), start_new(), exit_thread(). -__all__ = ['error', 'start_new_thread', 'exit', 'get_ident', 'allocate_lock', - 'interrupt_main', 'LockType'] - -import traceback as _traceback - -class error(Exception): - """Dummy implementation of thread.error.""" - - def __init__(self, *args): - self.args = args - -def start_new_thread(function, args, kwargs={}): - """Dummy implementation of thread.start_new_thread(). - - Compatibility is maintained by making sure that ``args`` is a - tuple and ``kwargs`` is a dictionary. If an exception is raised - and it is SystemExit (which can be done by thread.exit()) it is - caught and nothing is done; all other exceptions are printed out - by using traceback.print_exc(). - - If the executed function calls interrupt_main the KeyboardInterrupt will be - raised when the function returns. - - """ - if type(args) != type(tuple()): - raise TypeError("2nd arg must be a tuple") - if type(kwargs) != type(dict()): - raise TypeError("3rd arg must be a dict") - global _main - _main = False - try: - function(*args, **kwargs) - except SystemExit: - pass - except: - _traceback.print_exc() - _main = True - global _interrupt - if _interrupt: - _interrupt = False - raise KeyboardInterrupt - -def exit(): - """Dummy implementation of thread.exit().""" - raise SystemExit - -def get_ident(): - """Dummy implementation of thread.get_ident(). - - Since this module should only be used when threadmodule is not - available, it is safe to assume that the current process is the - only thread. Thus a constant can be safely returned. - """ - return -1 - -def allocate_lock(): - """Dummy implementation of thread.allocate_lock().""" - return LockType() - -def stack_size(size=None): - """Dummy implementation of thread.stack_size().""" - if size is not None: - raise error("setting thread stack size not supported") - return 0 - -class LockType(object): - """Class implementing dummy implementation of thread.LockType. - - Compatibility is maintained by maintaining self.locked_status - which is a boolean that stores the state of the lock. Pickling of - the lock, though, should not be done since if the thread module is - then used with an unpickled ``lock()`` from here problems could - occur from this class not having atomic methods. - - """ - - def __init__(self): - self.locked_status = False - - def acquire(self, waitflag=None): - """Dummy implementation of acquire(). - - For blocking calls, self.locked_status is automatically set to - True and returned appropriately based on value of - ``waitflag``. If it is non-blocking, then the value is - actually checked and not set if it is already acquired. This - is all done so that threading.Condition's assert statements - aren't triggered and throw a little fit. - - """ - if waitflag is None or waitflag: - self.locked_status = True - return True - else: - if not self.locked_status: - self.locked_status = True - return True - else: - return False - - __enter__ = acquire - - def __exit__(self, typ, val, tb): - self.release() - - def release(self): - """Release the dummy lock.""" - # XXX Perhaps shouldn't actually bother to test? Could lead - # to problems for complex, threaded code. - if not self.locked_status: - raise error - self.locked_status = False - return True - - def locked(self): - return self.locked_status - -# Used to signal that interrupt_main was called in a "thread" -_interrupt = False -# True when not executing in a "thread" -_main = True - -def interrupt_main(): - """Set _interrupt flag to True to have start_new_thread raise - KeyboardInterrupt upon exiting.""" - if _main: - raise KeyboardInterrupt - else: - global _interrupt - _interrupt = True diff --git a/python/Lib/dummy_threading.py b/python/Lib/dummy_threading.py deleted file mode 100755 index 81028a3d4e..0000000000 --- a/python/Lib/dummy_threading.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Faux ``threading`` version using ``dummy_thread`` instead of ``thread``. - -The module ``_dummy_threading`` is added to ``sys.modules`` in order -to not have ``threading`` considered imported. Had ``threading`` been -directly imported it would have made all subsequent imports succeed -regardless of whether ``thread`` was available which is not desired. - -""" -from sys import modules as sys_modules - -import dummy_thread - -# Declaring now so as to not have to nest ``try``s to get proper clean-up. -holding_thread = False -holding_threading = False -holding__threading_local = False - -try: - # Could have checked if ``thread`` was not in sys.modules and gone - # a different route, but decided to mirror technique used with - # ``threading`` below. - if 'thread' in sys_modules: - held_thread = sys_modules['thread'] - holding_thread = True - # Must have some module named ``thread`` that implements its API - # in order to initially import ``threading``. - sys_modules['thread'] = sys_modules['dummy_thread'] - - if 'threading' in sys_modules: - # If ``threading`` is already imported, might as well prevent - # trying to import it more than needed by saving it if it is - # already imported before deleting it. - held_threading = sys_modules['threading'] - holding_threading = True - del sys_modules['threading'] - - if '_threading_local' in sys_modules: - # If ``_threading_local`` is already imported, might as well prevent - # trying to import it more than needed by saving it if it is - # already imported before deleting it. - held__threading_local = sys_modules['_threading_local'] - holding__threading_local = True - del sys_modules['_threading_local'] - - import threading - # Need a copy of the code kept somewhere... - sys_modules['_dummy_threading'] = sys_modules['threading'] - del sys_modules['threading'] - sys_modules['_dummy__threading_local'] = sys_modules['_threading_local'] - del sys_modules['_threading_local'] - from _dummy_threading import * - from _dummy_threading import __all__ - -finally: - # Put back ``threading`` if we overwrote earlier - - if holding_threading: - sys_modules['threading'] = held_threading - del held_threading - del holding_threading - - # Put back ``_threading_local`` if we overwrote earlier - - if holding__threading_local: - sys_modules['_threading_local'] = held__threading_local - del held__threading_local - del holding__threading_local - - # Put back ``thread`` if we overwrote, else del the entry we made - if holding_thread: - sys_modules['thread'] = held_thread - del held_thread - else: - del sys_modules['thread'] - del holding_thread - - del dummy_thread - del sys_modules diff --git a/python/Lib/email/__init__.py b/python/Lib/email/__init__.py deleted file mode 100755 index a780ebe339..0000000000 --- a/python/Lib/email/__init__.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""A package for parsing, handling, and generating email messages.""" - -__version__ = '4.0.3' - -__all__ = [ - # Old names - 'base64MIME', - 'Charset', - 'Encoders', - 'Errors', - 'Generator', - 'Header', - 'Iterators', - 'Message', - 'MIMEAudio', - 'MIMEBase', - 'MIMEImage', - 'MIMEMessage', - 'MIMEMultipart', - 'MIMENonMultipart', - 'MIMEText', - 'Parser', - 'quopriMIME', - 'Utils', - 'message_from_string', - 'message_from_file', - # new names - 'base64mime', - 'charset', - 'encoders', - 'errors', - 'generator', - 'header', - 'iterators', - 'message', - 'mime', - 'parser', - 'quoprimime', - 'utils', - ] - - - -# Some convenience routines. Don't import Parser and Message as side-effects -# of importing email since those cascadingly import most of the rest of the -# email package. -def message_from_string(s, *args, **kws): - """Parse a string into a Message object model. - - Optional _class and strict are passed to the Parser constructor. - """ - from email.parser import Parser - return Parser(*args, **kws).parsestr(s) - - -def message_from_file(fp, *args, **kws): - """Read a file and parse its contents into a Message object model. - - Optional _class and strict are passed to the Parser constructor. - """ - from email.parser import Parser - return Parser(*args, **kws).parse(fp) - - - -# Lazy loading to provide name mapping from new-style names (PEP 8 compatible -# email 4.0 module names), to old-style names (email 3.0 module names). -import sys - -class LazyImporter(object): - def __init__(self, module_name): - self.__name__ = 'email.' + module_name - - def __getattr__(self, name): - __import__(self.__name__) - mod = sys.modules[self.__name__] - self.__dict__.update(mod.__dict__) - return getattr(mod, name) - - -_LOWERNAMES = [ - # email. -> email. - 'Charset', - 'Encoders', - 'Errors', - 'FeedParser', - 'Generator', - 'Header', - 'Iterators', - 'Message', - 'Parser', - 'Utils', - 'base64MIME', - 'quopriMIME', - ] - -_MIMENAMES = [ - # email.MIME -> email.mime. - 'Audio', - 'Base', - 'Image', - 'Message', - 'Multipart', - 'NonMultipart', - 'Text', - ] - -for _name in _LOWERNAMES: - importer = LazyImporter(_name.lower()) - sys.modules['email.' + _name] = importer - setattr(sys.modules['email'], _name, importer) - - -import email.mime -for _name in _MIMENAMES: - importer = LazyImporter('mime.' + _name.lower()) - sys.modules['email.MIME' + _name] = importer - setattr(sys.modules['email'], 'MIME' + _name, importer) - setattr(sys.modules['email.mime'], _name, importer) diff --git a/python/Lib/email/_parseaddr.py b/python/Lib/email/_parseaddr.py deleted file mode 100755 index 690db2c22d..0000000000 --- a/python/Lib/email/_parseaddr.py +++ /dev/null @@ -1,497 +0,0 @@ -# Copyright (C) 2002-2007 Python Software Foundation -# Contact: email-sig@python.org - -"""Email address parsing code. - -Lifted directly from rfc822.py. This should eventually be rewritten. -""" - -__all__ = [ - 'mktime_tz', - 'parsedate', - 'parsedate_tz', - 'quote', - ] - -import time, calendar - -SPACE = ' ' -EMPTYSTRING = '' -COMMASPACE = ', ' - -# Parse a date field -_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', - 'aug', 'sep', 'oct', 'nov', 'dec', - 'january', 'february', 'march', 'april', 'may', 'june', 'july', - 'august', 'september', 'october', 'november', 'december'] - -_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] - -# The timezone table does not include the military time zones defined -# in RFC822, other than Z. According to RFC1123, the description in -# RFC822 gets the signs wrong, so we can't rely on any such time -# zones. RFC1123 recommends that numeric timezone indicators be used -# instead of timezone names. - -_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, - 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) - 'EST': -500, 'EDT': -400, # Eastern - 'CST': -600, 'CDT': -500, # Central - 'MST': -700, 'MDT': -600, # Mountain - 'PST': -800, 'PDT': -700 # Pacific - } - - -def parsedate_tz(data): - """Convert a date string to a time tuple. - - Accounts for military timezones. - """ - data = data.split() - # The FWS after the comma after the day-of-week is optional, so search and - # adjust for this. - if data[0].endswith(',') or data[0].lower() in _daynames: - # There's a dayname here. Skip it - del data[0] - else: - i = data[0].rfind(',') - if i >= 0: - data[0] = data[0][i+1:] - if len(data) == 3: # RFC 850 date, deprecated - stuff = data[0].split('-') - if len(stuff) == 3: - data = stuff + data[1:] - if len(data) == 4: - s = data[3] - i = s.find('+') - if i > 0: - data[3:] = [s[:i], s[i+1:]] - else: - data.append('') # Dummy tz - if len(data) < 5: - return None - data = data[:5] - [dd, mm, yy, tm, tz] = data - mm = mm.lower() - if mm not in _monthnames: - dd, mm = mm, dd.lower() - if mm not in _monthnames: - return None - mm = _monthnames.index(mm) + 1 - if mm > 12: - mm -= 12 - if dd[-1] == ',': - dd = dd[:-1] - i = yy.find(':') - if i > 0: - yy, tm = tm, yy - if yy[-1] == ',': - yy = yy[:-1] - if not yy[0].isdigit(): - yy, tz = tz, yy - if tm[-1] == ',': - tm = tm[:-1] - tm = tm.split(':') - if len(tm) == 2: - [thh, tmm] = tm - tss = '0' - elif len(tm) == 3: - [thh, tmm, tss] = tm - else: - return None - try: - yy = int(yy) - dd = int(dd) - thh = int(thh) - tmm = int(tmm) - tss = int(tss) - except ValueError: - return None - # Check for a yy specified in two-digit format, then convert it to the - # appropriate four-digit format, according to the POSIX standard. RFC 822 - # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) - # mandates a 4-digit yy. For more information, see the documentation for - # the time module. - if yy < 100: - # The year is between 1969 and 1999 (inclusive). - if yy > 68: - yy += 1900 - # The year is between 2000 and 2068 (inclusive). - else: - yy += 2000 - tzoffset = None - tz = tz.upper() - if tz in _timezones: - tzoffset = _timezones[tz] - else: - try: - tzoffset = int(tz) - except ValueError: - pass - # Convert a timezone offset into seconds ; -0500 -> -18000 - if tzoffset: - if tzoffset < 0: - tzsign = -1 - tzoffset = -tzoffset - else: - tzsign = 1 - tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) - # Daylight Saving Time flag is set to -1, since DST is unknown. - return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset - - -def parsedate(data): - """Convert a time string to a time tuple.""" - t = parsedate_tz(data) - if isinstance(t, tuple): - return t[:9] - else: - return t - - -def mktime_tz(data): - """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" - if data[9] is None: - # No zone info, so localtime is better assumption than GMT - return time.mktime(data[:8] + (-1,)) - else: - t = calendar.timegm(data) - return t - data[9] - - -def quote(str): - """Prepare string to be used in a quoted string. - - Turns backslash and double quote characters into quoted pairs. These - are the only characters that need to be quoted inside a quoted string. - Does not add the surrounding double quotes. - """ - return str.replace('\\', '\\\\').replace('"', '\\"') - - -class AddrlistClass: - """Address parser class by Ben Escoto. - - To understand what this class does, it helps to have a copy of RFC 2822 in - front of you. - - Note: this class interface is deprecated and may be removed in the future. - Use rfc822.AddressList instead. - """ - - def __init__(self, field): - """Initialize a new instance. - - `field' is an unparsed address header field, containing - one or more addresses. - """ - self.specials = '()<>@,:;.\"[]' - self.pos = 0 - self.LWS = ' \t' - self.CR = '\r\n' - self.FWS = self.LWS + self.CR - self.atomends = self.specials + self.LWS + self.CR - # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it - # is obsolete syntax. RFC 2822 requires that we recognize obsolete - # syntax, so allow dots in phrases. - self.phraseends = self.atomends.replace('.', '') - self.field = field - self.commentlist = [] - - def gotonext(self): - """Parse up to the start of the next address.""" - while self.pos < len(self.field): - if self.field[self.pos] in self.LWS + '\n\r': - self.pos += 1 - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - else: - break - - def getaddrlist(self): - """Parse all addresses. - - Returns a list containing all of the addresses. - """ - result = [] - while self.pos < len(self.field): - ad = self.getaddress() - if ad: - result += ad - else: - result.append(('', '')) - return result - - def getaddress(self): - """Parse the next address.""" - self.commentlist = [] - self.gotonext() - - oldpos = self.pos - oldcl = self.commentlist - plist = self.getphraselist() - - self.gotonext() - returnlist = [] - - if self.pos >= len(self.field): - # Bad email address technically, no domain. - if plist: - returnlist = [(SPACE.join(self.commentlist), plist[0])] - - elif self.field[self.pos] in '.@': - # email address is just an addrspec - # this isn't very efficient since we start over - self.pos = oldpos - self.commentlist = oldcl - addrspec = self.getaddrspec() - returnlist = [(SPACE.join(self.commentlist), addrspec)] - - elif self.field[self.pos] == ':': - # address is a group - returnlist = [] - - fieldlen = len(self.field) - self.pos += 1 - while self.pos < len(self.field): - self.gotonext() - if self.pos < fieldlen and self.field[self.pos] == ';': - self.pos += 1 - break - returnlist = returnlist + self.getaddress() - - elif self.field[self.pos] == '<': - # Address is a phrase then a route addr - routeaddr = self.getrouteaddr() - - if self.commentlist: - returnlist = [(SPACE.join(plist) + ' (' + - ' '.join(self.commentlist) + ')', routeaddr)] - else: - returnlist = [(SPACE.join(plist), routeaddr)] - - else: - if plist: - returnlist = [(SPACE.join(self.commentlist), plist[0])] - elif self.field[self.pos] in self.specials: - self.pos += 1 - - self.gotonext() - if self.pos < len(self.field) and self.field[self.pos] == ',': - self.pos += 1 - return returnlist - - def getrouteaddr(self): - """Parse a route address (Return-path value). - - This method just skips all the route stuff and returns the addrspec. - """ - if self.field[self.pos] != '<': - return - - expectroute = False - self.pos += 1 - self.gotonext() - adlist = '' - while self.pos < len(self.field): - if expectroute: - self.getdomain() - expectroute = False - elif self.field[self.pos] == '>': - self.pos += 1 - break - elif self.field[self.pos] == '@': - self.pos += 1 - expectroute = True - elif self.field[self.pos] == ':': - self.pos += 1 - else: - adlist = self.getaddrspec() - self.pos += 1 - break - self.gotonext() - - return adlist - - def getaddrspec(self): - """Parse an RFC 2822 addr-spec.""" - aslist = [] - - self.gotonext() - while self.pos < len(self.field): - if self.field[self.pos] == '.': - aslist.append('.') - self.pos += 1 - elif self.field[self.pos] == '"': - aslist.append('"%s"' % quote(self.getquote())) - elif self.field[self.pos] in self.atomends: - break - else: - aslist.append(self.getatom()) - self.gotonext() - - if self.pos >= len(self.field) or self.field[self.pos] != '@': - return EMPTYSTRING.join(aslist) - - aslist.append('@') - self.pos += 1 - self.gotonext() - return EMPTYSTRING.join(aslist) + self.getdomain() - - def getdomain(self): - """Get the complete domain name from an address.""" - sdlist = [] - while self.pos < len(self.field): - if self.field[self.pos] in self.LWS: - self.pos += 1 - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - elif self.field[self.pos] == '[': - sdlist.append(self.getdomainliteral()) - elif self.field[self.pos] == '.': - self.pos += 1 - sdlist.append('.') - elif self.field[self.pos] in self.atomends: - break - else: - sdlist.append(self.getatom()) - return EMPTYSTRING.join(sdlist) - - def getdelimited(self, beginchar, endchars, allowcomments=True): - """Parse a header fragment delimited by special characters. - - `beginchar' is the start character for the fragment. - If self is not looking at an instance of `beginchar' then - getdelimited returns the empty string. - - `endchars' is a sequence of allowable end-delimiting characters. - Parsing stops when one of these is encountered. - - If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed - within the parsed fragment. - """ - if self.field[self.pos] != beginchar: - return '' - - slist = [''] - quote = False - self.pos += 1 - while self.pos < len(self.field): - if quote: - slist.append(self.field[self.pos]) - quote = False - elif self.field[self.pos] in endchars: - self.pos += 1 - break - elif allowcomments and self.field[self.pos] == '(': - slist.append(self.getcomment()) - continue # have already advanced pos from getcomment - elif self.field[self.pos] == '\\': - quote = True - else: - slist.append(self.field[self.pos]) - self.pos += 1 - - return EMPTYSTRING.join(slist) - - def getquote(self): - """Get a quote-delimited fragment from self's field.""" - return self.getdelimited('"', '"\r', False) - - def getcomment(self): - """Get a parenthesis-delimited fragment from self's field.""" - return self.getdelimited('(', ')\r', True) - - def getdomainliteral(self): - """Parse an RFC 2822 domain-literal.""" - return '[%s]' % self.getdelimited('[', ']\r', False) - - def getatom(self, atomends=None): - """Parse an RFC 2822 atom. - - Optional atomends specifies a different set of end token delimiters - (the default is to use self.atomends). This is used e.g. in - getphraselist() since phrase endings must not include the `.' (which - is legal in phrases).""" - atomlist = [''] - if atomends is None: - atomends = self.atomends - - while self.pos < len(self.field): - if self.field[self.pos] in atomends: - break - else: - atomlist.append(self.field[self.pos]) - self.pos += 1 - - return EMPTYSTRING.join(atomlist) - - def getphraselist(self): - """Parse a sequence of RFC 2822 phrases. - - A phrase is a sequence of words, which are in turn either RFC 2822 - atoms or quoted-strings. Phrases are canonicalized by squeezing all - runs of continuous whitespace into one space. - """ - plist = [] - - while self.pos < len(self.field): - if self.field[self.pos] in self.FWS: - self.pos += 1 - elif self.field[self.pos] == '"': - plist.append(self.getquote()) - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - elif self.field[self.pos] in self.phraseends: - break - else: - plist.append(self.getatom(self.phraseends)) - - return plist - -class AddressList(AddrlistClass): - """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" - def __init__(self, field): - AddrlistClass.__init__(self, field) - if field: - self.addresslist = self.getaddrlist() - else: - self.addresslist = [] - - def __len__(self): - return len(self.addresslist) - - def __add__(self, other): - # Set union - newaddr = AddressList(None) - newaddr.addresslist = self.addresslist[:] - for x in other.addresslist: - if not x in self.addresslist: - newaddr.addresslist.append(x) - return newaddr - - def __iadd__(self, other): - # Set union, in-place - for x in other.addresslist: - if not x in self.addresslist: - self.addresslist.append(x) - return self - - def __sub__(self, other): - # Set difference - newaddr = AddressList(None) - for x in self.addresslist: - if not x in other.addresslist: - newaddr.addresslist.append(x) - return newaddr - - def __isub__(self, other): - # Set difference, in-place - for x in other.addresslist: - if x in self.addresslist: - self.addresslist.remove(x) - return self - - def __getitem__(self, index): - # Make indexing, slices, and 'in' work - return self.addresslist[index] diff --git a/python/Lib/email/base64mime.py b/python/Lib/email/base64mime.py deleted file mode 100755 index dd6b686656..0000000000 --- a/python/Lib/email/base64mime.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright (C) 2002-2006 Python Software Foundation -# Author: Ben Gertzfield -# Contact: email-sig@python.org - -"""Base64 content transfer encoding per RFCs 2045-2047. - -This module handles the content transfer encoding method defined in RFC 2045 -to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit -characters encoding known as Base64. - -It is used in the MIME standards for email to attach images, audio, and text -using some 8-bit character sets to messages. - -This module provides an interface to encode and decode both headers and bodies -with Base64 encoding. - -RFC 2045 defines a method for including character set information in an -`encoded-word' in a header. This method is commonly used for 8-bit real names -in To:, From:, Cc:, etc. fields, as well as Subject: lines. - -This module does not do the line wrapping or end-of-line character conversion -necessary for proper internationalized headers; it only does dumb encoding and -decoding. To deal with the various line wrapping issues, use the email.header -module. -""" - -__all__ = [ - 'base64_len', - 'body_decode', - 'body_encode', - 'decode', - 'decodestring', - 'encode', - 'encodestring', - 'header_encode', - ] - - -from binascii import b2a_base64, a2b_base64 -from email.utils import fix_eols - -CRLF = '\r\n' -NL = '\n' -EMPTYSTRING = '' - -# See also Charset.py -MISC_LEN = 7 - - - -# Helpers -def base64_len(s): - """Return the length of s when it is encoded with base64.""" - groups_of_3, leftover = divmod(len(s), 3) - # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. - # Thanks, Tim! - n = groups_of_3 * 4 - if leftover: - n += 4 - return n - - - -def header_encode(header, charset='iso-8859-1', keep_eols=False, - maxlinelen=76, eol=NL): - """Encode a single header line with Base64 encoding in a given charset. - - Defined in RFC 2045, this Base64 encoding is identical to normal Base64 - encoding, except that each line must be intelligently wrapped (respecting - the Base64 encoding), and subsequent lines must start with a space. - - charset names the character set to use to encode the header. It defaults - to iso-8859-1. - - End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted - to the canonical email line separator \\r\\n unless the keep_eols - parameter is True (the default is False). - - Each line of the header will be terminated in the value of eol, which - defaults to "\\n". Set this to "\\r\\n" if you are using the result of - this function directly in email. - - The resulting string will be in the form: - - "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n - =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?=" - - with each line wrapped at, at most, maxlinelen characters (defaults to 76 - characters). - """ - # Return empty headers unchanged - if not header: - return header - - if not keep_eols: - header = fix_eols(header) - - # Base64 encode each line, in encoded chunks no greater than maxlinelen in - # length, after the RFC chrome is added in. - base64ed = [] - max_encoded = maxlinelen - len(charset) - MISC_LEN - max_unencoded = max_encoded * 3 // 4 - - for i in range(0, len(header), max_unencoded): - base64ed.append(b2a_base64(header[i:i+max_unencoded])) - - # Now add the RFC chrome to each encoded chunk - lines = [] - for line in base64ed: - # Ignore the last character of each line if it is a newline - if line.endswith(NL): - line = line[:-1] - # Add the chrome - lines.append('=?%s?b?%s?=' % (charset, line)) - # Glue the lines together and return it. BAW: should we be able to - # specify the leading whitespace in the joiner? - joiner = eol + ' ' - return joiner.join(lines) - - - -def encode(s, binary=True, maxlinelen=76, eol=NL): - """Encode a string with base64. - - Each line will be wrapped at, at most, maxlinelen characters (defaults to - 76 characters). - - If binary is False, end-of-line characters will be converted to the - canonical email end-of-line sequence \\r\\n. Otherwise they will be left - verbatim (this is the default). - - Each line of encoded text will end with eol, which defaults to "\\n". Set - this to "\\r\\n" if you will be using the result of this function directly - in an email. - """ - if not s: - return s - - if not binary: - s = fix_eols(s) - - encvec = [] - max_unencoded = maxlinelen * 3 // 4 - for i in range(0, len(s), max_unencoded): - # BAW: should encode() inherit b2a_base64()'s dubious behavior in - # adding a newline to the encoded string? - enc = b2a_base64(s[i:i + max_unencoded]) - if enc.endswith(NL) and eol != NL: - enc = enc[:-1] + eol - encvec.append(enc) - return EMPTYSTRING.join(encvec) - - -# For convenience and backwards compatibility w/ standard base64 module -body_encode = encode -encodestring = encode - - - -def decode(s, convert_eols=None): - """Decode a raw base64 string. - - If convert_eols is set to a string value, all canonical email linefeeds, - e.g. "\\r\\n", in the decoded text will be converted to the value of - convert_eols. os.linesep is a good choice for convert_eols if you are - decoding a text attachment. - - This function does not parse a full MIME header value encoded with - base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high - level email.header class for that functionality. - """ - if not s: - return s - - dec = a2b_base64(s) - if convert_eols: - return dec.replace(CRLF, convert_eols) - return dec - - -# For convenience and backwards compatibility w/ standard base64 module -body_decode = decode -decodestring = decode diff --git a/python/Lib/email/charset.py b/python/Lib/email/charset.py deleted file mode 100755 index 30a13ff249..0000000000 --- a/python/Lib/email/charset.py +++ /dev/null @@ -1,397 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Ben Gertzfield, Barry Warsaw -# Contact: email-sig@python.org - -__all__ = [ - 'Charset', - 'add_alias', - 'add_charset', - 'add_codec', - ] - -import codecs -import email.base64mime -import email.quoprimime - -from email import errors -from email.encoders import encode_7or8bit - - - -# Flags for types of header encodings -QP = 1 # Quoted-Printable -BASE64 = 2 # Base64 -SHORTEST = 3 # the shorter of QP and base64, but only for headers - -# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 -MISC_LEN = 7 - -DEFAULT_CHARSET = 'us-ascii' - - - -# Defaults -CHARSETS = { - # input header enc body enc output conv - 'iso-8859-1': (QP, QP, None), - 'iso-8859-2': (QP, QP, None), - 'iso-8859-3': (QP, QP, None), - 'iso-8859-4': (QP, QP, None), - # iso-8859-5 is Cyrillic, and not especially used - # iso-8859-6 is Arabic, also not particularly used - # iso-8859-7 is Greek, QP will not make it readable - # iso-8859-8 is Hebrew, QP will not make it readable - 'iso-8859-9': (QP, QP, None), - 'iso-8859-10': (QP, QP, None), - # iso-8859-11 is Thai, QP will not make it readable - 'iso-8859-13': (QP, QP, None), - 'iso-8859-14': (QP, QP, None), - 'iso-8859-15': (QP, QP, None), - 'iso-8859-16': (QP, QP, None), - 'windows-1252':(QP, QP, None), - 'viscii': (QP, QP, None), - 'us-ascii': (None, None, None), - 'big5': (BASE64, BASE64, None), - 'gb2312': (BASE64, BASE64, None), - 'euc-jp': (BASE64, None, 'iso-2022-jp'), - 'shift_jis': (BASE64, None, 'iso-2022-jp'), - 'iso-2022-jp': (BASE64, None, None), - 'koi8-r': (BASE64, BASE64, None), - 'utf-8': (SHORTEST, BASE64, 'utf-8'), - # We're making this one up to represent raw unencoded 8-bit - '8bit': (None, BASE64, 'utf-8'), - } - -# Aliases for other commonly-used names for character sets. Map -# them to the real ones used in email. -ALIASES = { - 'latin_1': 'iso-8859-1', - 'latin-1': 'iso-8859-1', - 'latin_2': 'iso-8859-2', - 'latin-2': 'iso-8859-2', - 'latin_3': 'iso-8859-3', - 'latin-3': 'iso-8859-3', - 'latin_4': 'iso-8859-4', - 'latin-4': 'iso-8859-4', - 'latin_5': 'iso-8859-9', - 'latin-5': 'iso-8859-9', - 'latin_6': 'iso-8859-10', - 'latin-6': 'iso-8859-10', - 'latin_7': 'iso-8859-13', - 'latin-7': 'iso-8859-13', - 'latin_8': 'iso-8859-14', - 'latin-8': 'iso-8859-14', - 'latin_9': 'iso-8859-15', - 'latin-9': 'iso-8859-15', - 'latin_10':'iso-8859-16', - 'latin-10':'iso-8859-16', - 'cp949': 'ks_c_5601-1987', - 'euc_jp': 'euc-jp', - 'euc_kr': 'euc-kr', - 'ascii': 'us-ascii', - } - - -# Map charsets to their Unicode codec strings. -CODEC_MAP = { - 'gb2312': 'eucgb2312_cn', - 'big5': 'big5_tw', - # Hack: We don't want *any* conversion for stuff marked us-ascii, as all - # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. - # Let that stuff pass through without conversion to/from Unicode. - 'us-ascii': None, - } - - - -# Convenience functions for extending the above mappings -def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): - """Add character set properties to the global registry. - - charset is the input character set, and must be the canonical name of a - character set. - - Optional header_enc and body_enc is either Charset.QP for - quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for - the shortest of qp or base64 encoding, or None for no encoding. SHORTEST - is only valid for header_enc. It describes how message headers and - message bodies in the input charset are to be encoded. Default is no - encoding. - - Optional output_charset is the character set that the output should be - in. Conversions will proceed from input charset, to Unicode, to the - output charset when the method Charset.convert() is called. The default - is to output in the same character set as the input. - - Both input_charset and output_charset must have Unicode codec entries in - the module's charset-to-codec mapping; use add_codec(charset, codecname) - to add codecs the module does not know about. See the codecs module's - documentation for more information. - """ - if body_enc == SHORTEST: - raise ValueError('SHORTEST not allowed for body_enc') - CHARSETS[charset] = (header_enc, body_enc, output_charset) - - -def add_alias(alias, canonical): - """Add a character set alias. - - alias is the alias name, e.g. latin-1 - canonical is the character set's canonical name, e.g. iso-8859-1 - """ - ALIASES[alias] = canonical - - -def add_codec(charset, codecname): - """Add a codec that map characters in the given charset to/from Unicode. - - charset is the canonical name of a character set. codecname is the name - of a Python codec, as appropriate for the second argument to the unicode() - built-in, or to the encode() method of a Unicode string. - """ - CODEC_MAP[charset] = codecname - - - -class Charset: - """Map character sets to their email properties. - - This class provides information about the requirements imposed on email - for a specific character set. It also provides convenience routines for - converting between character sets, given the availability of the - applicable codecs. Given a character set, it will do its best to provide - information on how to use that character set in an email in an - RFC-compliant way. - - Certain character sets must be encoded with quoted-printable or base64 - when used in email headers or bodies. Certain character sets must be - converted outright, and are not allowed in email. Instances of this - module expose the following information about a character set: - - input_charset: The initial character set specified. Common aliases - are converted to their `official' email names (e.g. latin_1 - is converted to iso-8859-1). Defaults to 7-bit us-ascii. - - header_encoding: If the character set must be encoded before it can be - used in an email header, this attribute will be set to - Charset.QP (for quoted-printable), Charset.BASE64 (for - base64 encoding), or Charset.SHORTEST for the shortest of - QP or BASE64 encoding. Otherwise, it will be None. - - body_encoding: Same as header_encoding, but describes the encoding for the - mail message's body, which indeed may be different than the - header encoding. Charset.SHORTEST is not allowed for - body_encoding. - - output_charset: Some character sets must be converted before they can be - used in email headers or bodies. If the input_charset is - one of them, this attribute will contain the name of the - charset output will be converted to. Otherwise, it will - be None. - - input_codec: The name of the Python codec used to convert the - input_charset to Unicode. If no conversion codec is - necessary, this attribute will be None. - - output_codec: The name of the Python codec used to convert Unicode - to the output_charset. If no conversion codec is necessary, - this attribute will have the same value as the input_codec. - """ - def __init__(self, input_charset=DEFAULT_CHARSET): - # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to - # unicode because its .lower() is locale insensitive. If the argument - # is already a unicode, we leave it at that, but ensure that the - # charset is ASCII, as the standard (RFC XXX) requires. - try: - if isinstance(input_charset, unicode): - input_charset.encode('ascii') - else: - input_charset = unicode(input_charset, 'ascii') - except UnicodeError: - raise errors.CharsetError(input_charset) - input_charset = input_charset.lower().encode('ascii') - # Set the input charset after filtering through the aliases and/or codecs - if not (input_charset in ALIASES or input_charset in CHARSETS): - try: - input_charset = codecs.lookup(input_charset).name - except LookupError: - pass - self.input_charset = ALIASES.get(input_charset, input_charset) - # We can try to guess which encoding and conversion to use by the - # charset_map dictionary. Try that first, but let the user override - # it. - henc, benc, conv = CHARSETS.get(self.input_charset, - (SHORTEST, BASE64, None)) - if not conv: - conv = self.input_charset - # Set the attributes, allowing the arguments to override the default. - self.header_encoding = henc - self.body_encoding = benc - self.output_charset = ALIASES.get(conv, conv) - # Now set the codecs. If one isn't defined for input_charset, - # guess and try a Unicode codec with the same name as input_codec. - self.input_codec = CODEC_MAP.get(self.input_charset, - self.input_charset) - self.output_codec = CODEC_MAP.get(self.output_charset, - self.output_charset) - - def __str__(self): - return self.input_charset.lower() - - __repr__ = __str__ - - def __eq__(self, other): - return str(self) == str(other).lower() - - def __ne__(self, other): - return not self.__eq__(other) - - def get_body_encoding(self): - """Return the content-transfer-encoding used for body encoding. - - This is either the string `quoted-printable' or `base64' depending on - the encoding used, or it is a function in which case you should call - the function with a single argument, the Message object being - encoded. The function should then set the Content-Transfer-Encoding - header itself to whatever is appropriate. - - Returns "quoted-printable" if self.body_encoding is QP. - Returns "base64" if self.body_encoding is BASE64. - Returns "7bit" otherwise. - """ - assert self.body_encoding != SHORTEST - if self.body_encoding == QP: - return 'quoted-printable' - elif self.body_encoding == BASE64: - return 'base64' - else: - return encode_7or8bit - - def convert(self, s): - """Convert a string from the input_codec to the output_codec.""" - if self.input_codec != self.output_codec: - return unicode(s, self.input_codec).encode(self.output_codec) - else: - return s - - def to_splittable(self, s): - """Convert a possibly multibyte string to a safely splittable format. - - Uses the input_codec to try and convert the string to Unicode, so it - can be safely split on character boundaries (even for multibyte - characters). - - Returns the string as-is if it isn't known how to convert it to - Unicode with the input_charset. - - Characters that could not be converted to Unicode will be replaced - with the Unicode replacement character U+FFFD. - """ - if isinstance(s, unicode) or self.input_codec is None: - return s - try: - return unicode(s, self.input_codec, 'replace') - except LookupError: - # Input codec not installed on system, so return the original - # string unchanged. - return s - - def from_splittable(self, ustr, to_output=True): - """Convert a splittable string back into an encoded string. - - Uses the proper codec to try and convert the string from Unicode back - into an encoded format. Return the string as-is if it is not Unicode, - or if it could not be converted from Unicode. - - Characters that could not be converted from Unicode will be replaced - with an appropriate character (usually '?'). - - If to_output is True (the default), uses output_codec to convert to an - encoded format. If to_output is False, uses input_codec. - """ - if to_output: - codec = self.output_codec - else: - codec = self.input_codec - if not isinstance(ustr, unicode) or codec is None: - return ustr - try: - return ustr.encode(codec, 'replace') - except LookupError: - # Output codec not installed - return ustr - - def get_output_charset(self): - """Return the output character set. - - This is self.output_charset if that is not None, otherwise it is - self.input_charset. - """ - return self.output_charset or self.input_charset - - def encoded_header_len(self, s): - """Return the length of the encoded header string.""" - cset = self.get_output_charset() - # The len(s) of a 7bit encoding is len(s) - if self.header_encoding == BASE64: - return email.base64mime.base64_len(s) + len(cset) + MISC_LEN - elif self.header_encoding == QP: - return email.quoprimime.header_quopri_len(s) + len(cset) + MISC_LEN - elif self.header_encoding == SHORTEST: - lenb64 = email.base64mime.base64_len(s) - lenqp = email.quoprimime.header_quopri_len(s) - return min(lenb64, lenqp) + len(cset) + MISC_LEN - else: - return len(s) - - def header_encode(self, s, convert=False): - """Header-encode a string, optionally converting it to output_charset. - - If convert is True, the string will be converted from the input - charset to the output charset automatically. This is not useful for - multibyte character sets, which have line length issues (multibyte - characters must be split on a character, not a byte boundary); use the - high-level Header class to deal with these issues. convert defaults - to False. - - The type of encoding (base64 or quoted-printable) will be based on - self.header_encoding. - """ - cset = self.get_output_charset() - if convert: - s = self.convert(s) - # 7bit/8bit encodings return the string unchanged (modulo conversions) - if self.header_encoding == BASE64: - return email.base64mime.header_encode(s, cset) - elif self.header_encoding == QP: - return email.quoprimime.header_encode(s, cset, maxlinelen=None) - elif self.header_encoding == SHORTEST: - lenb64 = email.base64mime.base64_len(s) - lenqp = email.quoprimime.header_quopri_len(s) - if lenb64 < lenqp: - return email.base64mime.header_encode(s, cset) - else: - return email.quoprimime.header_encode(s, cset, maxlinelen=None) - else: - return s - - def body_encode(self, s, convert=True): - """Body-encode a string and convert it to output_charset. - - If convert is True (the default), the string will be converted from - the input charset to output charset automatically. Unlike - header_encode(), there are no issues with byte boundaries and - multibyte charsets in email bodies, so this is usually pretty safe. - - The type of encoding (base64 or quoted-printable) will be based on - self.body_encoding. - """ - if convert: - s = self.convert(s) - # 7bit/8bit encodings return the string unchanged (module conversions) - if self.body_encoding is BASE64: - return email.base64mime.body_encode(s) - elif self.body_encoding is QP: - return email.quoprimime.body_encode(s) - else: - return s diff --git a/python/Lib/email/encoders.py b/python/Lib/email/encoders.py deleted file mode 100755 index af45e62c33..0000000000 --- a/python/Lib/email/encoders.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Encodings and related functions.""" - -__all__ = [ - 'encode_7or8bit', - 'encode_base64', - 'encode_noop', - 'encode_quopri', - ] - -import base64 - -from quopri import encodestring as _encodestring - - - -def _qencode(s): - enc = _encodestring(s, quotetabs=True) - # Must encode spaces, which quopri.encodestring() doesn't do - return enc.replace(' ', '=20') - - -def _bencode(s): - # We can't quite use base64.encodestring() since it tacks on a "courtesy - # newline". Blech! - if not s: - return s - hasnewline = (s[-1] == '\n') - value = base64.encodestring(s) - if not hasnewline and value[-1] == '\n': - return value[:-1] - return value - - - -def encode_base64(msg): - """Encode the message's payload in Base64. - - Also, add an appropriate Content-Transfer-Encoding header. - """ - orig = msg.get_payload() - encdata = _bencode(orig) - msg.set_payload(encdata) - msg['Content-Transfer-Encoding'] = 'base64' - - - -def encode_quopri(msg): - """Encode the message's payload in quoted-printable. - - Also, add an appropriate Content-Transfer-Encoding header. - """ - orig = msg.get_payload() - encdata = _qencode(orig) - msg.set_payload(encdata) - msg['Content-Transfer-Encoding'] = 'quoted-printable' - - - -def encode_7or8bit(msg): - """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" - orig = msg.get_payload() - if orig is None: - # There's no payload. For backwards compatibility we use 7bit - msg['Content-Transfer-Encoding'] = '7bit' - return - # We play a trick to make this go fast. If encoding to ASCII succeeds, we - # know the data must be 7bit, otherwise treat it as 8bit. - try: - orig.encode('ascii') - except UnicodeError: - msg['Content-Transfer-Encoding'] = '8bit' - else: - msg['Content-Transfer-Encoding'] = '7bit' - - - -def encode_noop(msg): - """Do nothing.""" diff --git a/python/Lib/email/errors.py b/python/Lib/email/errors.py deleted file mode 100755 index d52a624601..0000000000 --- a/python/Lib/email/errors.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""email package exception classes.""" - - - -class MessageError(Exception): - """Base class for errors in the email package.""" - - -class MessageParseError(MessageError): - """Base class for message parsing errors.""" - - -class HeaderParseError(MessageParseError): - """Error while parsing headers.""" - - -class BoundaryError(MessageParseError): - """Couldn't find terminating boundary.""" - - -class MultipartConversionError(MessageError, TypeError): - """Conversion to a multipart is prohibited.""" - - -class CharsetError(MessageError): - """An illegal charset was given.""" - - - -# These are parsing defects which the parser was able to work around. -class MessageDefect: - """Base class for a message defect.""" - - def __init__(self, line=None): - self.line = line - -class NoBoundaryInMultipartDefect(MessageDefect): - """A message claimed to be a multipart but had no boundary parameter.""" - -class StartBoundaryNotFoundDefect(MessageDefect): - """The claimed start boundary was never found.""" - -class FirstHeaderLineIsContinuationDefect(MessageDefect): - """A message had a continuation line as its first header line.""" - -class MisplacedEnvelopeHeaderDefect(MessageDefect): - """A 'Unix-from' header was found in the middle of a header block.""" - -class MalformedHeaderDefect(MessageDefect): - """Found a header that was missing a colon, or was otherwise malformed.""" - -class MultipartInvariantViolationDefect(MessageDefect): - """A message claimed to be a multipart but no subparts were found.""" diff --git a/python/Lib/email/feedparser.py b/python/Lib/email/feedparser.py deleted file mode 100755 index 8031ca666e..0000000000 --- a/python/Lib/email/feedparser.py +++ /dev/null @@ -1,505 +0,0 @@ -# Copyright (C) 2004-2006 Python Software Foundation -# Authors: Baxter, Wouters and Warsaw -# Contact: email-sig@python.org - -"""FeedParser - An email feed parser. - -The feed parser implements an interface for incrementally parsing an email -message, line by line. This has advantages for certain applications, such as -those reading email messages off a socket. - -FeedParser.feed() is the primary interface for pushing new data into the -parser. It returns when there's nothing more it can do with the available -data. When you have no more data to push into the parser, call .close(). -This completes the parsing and returns the root message object. - -The other advantage of this parser is that it will never raise a parsing -exception. Instead, when it finds something unexpected, it adds a 'defect' to -the current message. Defects are just instances that live on the message -object's .defects attribute. -""" - -__all__ = ['FeedParser'] - -import re - -from email import errors -from email import message - -NLCRE = re.compile('\r\n|\r|\n') -NLCRE_bol = re.compile('(\r\n|\r|\n)') -NLCRE_eol = re.compile('(\r\n|\r|\n)\Z') -NLCRE_crack = re.compile('(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character -# except controls, SP, and ":". -headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') -EMPTYSTRING = '' -NL = '\n' - -NeedMoreData = object() - - - -class BufferedSubFile(object): - """A file-ish object that can have new data loaded into it. - - You can also push and pop line-matching predicates onto a stack. When the - current predicate matches the current line, a false EOF response - (i.e. empty string) is returned instead. This lets the parser adhere to a - simple abstraction -- it parses until EOF closes the current message. - """ - def __init__(self): - # Chunks of the last partial line pushed into this object. - self._partial = [] - # The list of full, pushed lines, in reverse order - self._lines = [] - # The stack of false-EOF checking predicates. - self._eofstack = [] - # A flag indicating whether the file has been closed or not. - self._closed = False - - def push_eof_matcher(self, pred): - self._eofstack.append(pred) - - def pop_eof_matcher(self): - return self._eofstack.pop() - - def close(self): - # Don't forget any trailing partial line. - self.pushlines(''.join(self._partial).splitlines(True)) - self._partial = [] - self._closed = True - - def readline(self): - if not self._lines: - if self._closed: - return '' - return NeedMoreData - # Pop the line off the stack and see if it matches the current - # false-EOF predicate. - line = self._lines.pop() - # RFC 2046, section 5.1.2 requires us to recognize outer level - # boundaries at any level of inner nesting. Do this, but be sure it's - # in the order of most to least nested. - for ateof in self._eofstack[::-1]: - if ateof(line): - # We're at the false EOF. But push the last line back first. - self._lines.append(line) - return '' - return line - - def unreadline(self, line): - # Let the consumer push a line back into the buffer. - assert line is not NeedMoreData - self._lines.append(line) - - def push(self, data): - """Push some new data into this object.""" - # Crack into lines, but preserve the linesep characters on the end of each - parts = data.splitlines(True) - - if not parts or not parts[0].endswith(('\n', '\r')): - # No new complete lines, so just accumulate partials - self._partial += parts - return - - if self._partial: - # If there are previous leftovers, complete them now - self._partial.append(parts[0]) - parts[0:1] = ''.join(self._partial).splitlines(True) - del self._partial[:] - - # If the last element of the list does not end in a newline, then treat - # it as a partial line. We only check for '\n' here because a line - # ending with '\r' might be a line that was split in the middle of a - # '\r\n' sequence (see bugs 1555570 and 1721862). - if not parts[-1].endswith('\n'): - self._partial = [parts.pop()] - self.pushlines(parts) - - def pushlines(self, lines): - # Crack into lines, but preserve the newlines on the end of each - parts = NLCRE_crack.split(data) - # The *ahem* interesting behaviour of re.split when supplied grouping - # parentheses is that the last element of the resulting list is the - # data after the final RE. In the case of a NL/CR terminated string, - # this is the empty string. - self._partial = parts.pop() - #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r: - # is there a \n to follow later? - if not self._partial and parts and parts[-1].endswith('\r'): - self._partial = parts.pop(-2)+parts.pop() - # parts is a list of strings, alternating between the line contents - # and the eol character(s). Gather up a list of lines after - # re-attaching the newlines. - lines = [] - for i in range(len(parts) // 2): - lines.append(parts[i*2] + parts[i*2+1]) - self.pushlines(lines) - - def pushlines(self, lines): - # Reverse and insert at the front of the lines. - self._lines[:0] = lines[::-1] - - def is_closed(self): - return self._closed - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if line == '': - raise StopIteration - return line - - - -class FeedParser: - """A feed-style parser of email.""" - - def __init__(self, _factory=message.Message): - """_factory is called with no arguments to create a new message obj""" - self._factory = _factory - self._input = BufferedSubFile() - self._msgstack = [] - self._parse = self._parsegen().next - self._cur = None - self._last = None - self._headersonly = False - - # Non-public interface for supporting Parser's headersonly flag - def _set_headersonly(self): - self._headersonly = True - - def feed(self, data): - """Push more data into the parser.""" - self._input.push(data) - self._call_parse() - - def _call_parse(self): - try: - self._parse() - except StopIteration: - pass - - def close(self): - """Parse all remaining data and return the root message object.""" - self._input.close() - self._call_parse() - root = self._pop_message() - assert not self._msgstack - # Look for final set of defects - if root.get_content_maintype() == 'multipart' \ - and not root.is_multipart(): - root.defects.append(errors.MultipartInvariantViolationDefect()) - return root - - def _new_message(self): - msg = self._factory() - if self._cur and self._cur.get_content_type() == 'multipart/digest': - msg.set_default_type('message/rfc822') - if self._msgstack: - self._msgstack[-1].attach(msg) - self._msgstack.append(msg) - self._cur = msg - self._last = msg - - def _pop_message(self): - retval = self._msgstack.pop() - if self._msgstack: - self._cur = self._msgstack[-1] - else: - self._cur = None - return retval - - def _parsegen(self): - # Create a new message and start by parsing headers. - self._new_message() - headers = [] - # Collect the headers, searching for a line that doesn't match the RFC - # 2822 header or continuation pattern (including an empty line). - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - if not headerRE.match(line): - # If we saw the RFC defined header/body separator - # (i.e. newline), just throw it away. Otherwise the line is - # part of the body so push it back. - if not NLCRE.match(line): - self._input.unreadline(line) - break - headers.append(line) - # Done with the headers, so parse them and figure out what we're - # supposed to see in the body of the message. - self._parse_headers(headers) - # Headers-only parsing is a backwards compatibility hack, which was - # necessary in the older parser, which could raise errors. All - # remaining lines in the input are thrown into the message body. - if self._headersonly: - lines = [] - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - if line == '': - break - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - return - if self._cur.get_content_type() == 'message/delivery-status': - # message/delivery-status contains blocks of headers separated by - # a blank line. We'll represent each header block as a separate - # nested message object, but the processing is a bit different - # than standard message/* types because there is no body for the - # nested messages. A blank line separates the subparts. - while True: - self._input.push_eof_matcher(NLCRE.match) - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - msg = self._pop_message() - # We need to pop the EOF matcher in order to tell if we're at - # the end of the current file, not the end of the last block - # of message headers. - self._input.pop_eof_matcher() - # The input stream must be sitting at the newline or at the - # EOF. We want to see if we're at the end of this subpart, so - # first consume the blank line, then test the next line to see - # if we're at this subpart's EOF. - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - break - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - break - if line == '': - break - # Not at EOF so this is a line we're going to need. - self._input.unreadline(line) - return - if self._cur.get_content_maintype() == 'message': - # The message claims to be a message/* type, then what follows is - # another RFC 2822 message. - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - self._pop_message() - return - if self._cur.get_content_maintype() == 'multipart': - boundary = self._cur.get_boundary() - if boundary is None: - # The message /claims/ to be a multipart but it has not - # defined a boundary. That's a problem which we'll handle by - # reading everything until the EOF and marking the message as - # defective. - self._cur.defects.append(errors.NoBoundaryInMultipartDefect()) - lines = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - return - # Create a line match predicate which matches the inter-part - # boundary as well as the end-of-multipart boundary. Don't push - # this onto the input stream until we've scanned past the - # preamble. - separator = '--' + boundary - boundaryre = re.compile( - '(?P' + re.escape(separator) + - r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') - capturing_preamble = True - preamble = [] - linesep = False - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - if line == '': - break - mo = boundaryre.match(line) - if mo: - # If we're looking at the end boundary, we're done with - # this multipart. If there was a newline at the end of - # the closing boundary, then we need to initialize the - # epilogue with the empty string (see below). - if mo.group('end'): - linesep = mo.group('linesep') - break - # We saw an inter-part boundary. Were we in the preamble? - if capturing_preamble: - if preamble: - # According to RFC 2046, the last newline belongs - # to the boundary. - lastline = preamble[-1] - eolmo = NLCRE_eol.search(lastline) - if eolmo: - preamble[-1] = lastline[:-len(eolmo.group(0))] - self._cur.preamble = EMPTYSTRING.join(preamble) - capturing_preamble = False - self._input.unreadline(line) - continue - # We saw a boundary separating two parts. Consume any - # multiple boundary lines that may be following. Our - # interpretation of RFC 2046 BNF grammar does not produce - # body parts within such double boundaries. - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - mo = boundaryre.match(line) - if not mo: - self._input.unreadline(line) - break - # Recurse to parse this subpart; the input stream points - # at the subpart's first line. - self._input.push_eof_matcher(boundaryre.match) - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - # Because of RFC 2046, the newline preceding the boundary - # separator actually belongs to the boundary, not the - # previous subpart's payload (or epilogue if the previous - # part is a multipart). - if self._last.get_content_maintype() == 'multipart': - epilogue = self._last.epilogue - if epilogue == '': - self._last.epilogue = None - elif epilogue is not None: - mo = NLCRE_eol.search(epilogue) - if mo: - end = len(mo.group(0)) - self._last.epilogue = epilogue[:-end] - else: - payload = self._last.get_payload() - if isinstance(payload, basestring): - mo = NLCRE_eol.search(payload) - if mo: - payload = payload[:-len(mo.group(0))] - self._last.set_payload(payload) - self._input.pop_eof_matcher() - self._pop_message() - # Set the multipart up for newline cleansing, which will - # happen if we're in a nested multipart. - self._last = self._cur - else: - # I think we must be in the preamble - assert capturing_preamble - preamble.append(line) - # We've seen either the EOF or the end boundary. If we're still - # capturing the preamble, we never saw the start boundary. Note - # that as a defect and store the captured text as the payload. - # Everything from here to the EOF is epilogue. - if capturing_preamble: - self._cur.defects.append(errors.StartBoundaryNotFoundDefect()) - self._cur.set_payload(EMPTYSTRING.join(preamble)) - epilogue = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - self._cur.epilogue = EMPTYSTRING.join(epilogue) - return - # If the end boundary ended in a newline, we'll need to make sure - # the epilogue isn't None - if linesep: - epilogue = [''] - else: - epilogue = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - epilogue.append(line) - # Any CRLF at the front of the epilogue is not technically part of - # the epilogue. Also, watch out for an empty string epilogue, - # which means a single newline. - if epilogue: - firstline = epilogue[0] - bolmo = NLCRE_bol.match(firstline) - if bolmo: - epilogue[0] = firstline[len(bolmo.group(0)):] - self._cur.epilogue = EMPTYSTRING.join(epilogue) - return - # Otherwise, it's some non-multipart type, so the entire rest of the - # file contents becomes the payload. - lines = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - - def _parse_headers(self, lines): - # Passed a list of lines that make up the headers for the current msg - lastheader = '' - lastvalue = [] - for lineno, line in enumerate(lines): - # Check for continuation - if line[0] in ' \t': - if not lastheader: - # The first line of the headers was a continuation. This - # is illegal, so let's note the defect, store the illegal - # line, and ignore it for purposes of headers. - defect = errors.FirstHeaderLineIsContinuationDefect(line) - self._cur.defects.append(defect) - continue - lastvalue.append(line) - continue - if lastheader: - # XXX reconsider the joining of folded lines - lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n') - self._cur[lastheader] = lhdr - lastheader, lastvalue = '', [] - # Check for envelope header, i.e. unix-from - if line.startswith('From '): - if lineno == 0: - # Strip off the trailing newline - mo = NLCRE_eol.search(line) - if mo: - line = line[:-len(mo.group(0))] - self._cur.set_unixfrom(line) - continue - elif lineno == len(lines) - 1: - # Something looking like a unix-from at the end - it's - # probably the first line of the body, so push back the - # line and stop. - self._input.unreadline(line) - return - else: - # Weirdly placed unix-from line. Note this as a defect - # and ignore it. - defect = errors.MisplacedEnvelopeHeaderDefect(line) - self._cur.defects.append(defect) - continue - # Split the line on the colon separating field name from value. - i = line.find(':') - if i < 0: - defect = errors.MalformedHeaderDefect(line) - self._cur.defects.append(defect) - continue - lastheader = line[:i] - lastvalue = [line[i+1:].lstrip()] - # Done with all the lines, so handle the last header. - if lastheader: - # XXX reconsider the joining of folded lines - self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n') diff --git a/python/Lib/email/generator.py b/python/Lib/email/generator.py deleted file mode 100755 index e50f912c5a..0000000000 --- a/python/Lib/email/generator.py +++ /dev/null @@ -1,371 +0,0 @@ -# Copyright (C) 2001-2010 Python Software Foundation -# Contact: email-sig@python.org - -"""Classes to generate plain text from a message object tree.""" - -__all__ = ['Generator', 'DecodedGenerator'] - -import re -import sys -import time -import random -import warnings - -from cStringIO import StringIO -from email.header import Header - -UNDERSCORE = '_' -NL = '\n' - -fcre = re.compile(r'^From ', re.MULTILINE) - -def _is8bitstring(s): - if isinstance(s, str): - try: - unicode(s, 'us-ascii') - except UnicodeError: - return True - return False - - - -class Generator: - """Generates output from a Message object tree. - - This basic generator writes the message to the given file object as plain - text. - """ - # - # Public interface - # - - def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): - """Create the generator for message flattening. - - outfp is the output file-like object for writing the message to. It - must have a write() method. - - Optional mangle_from_ is a flag that, when True (the default), escapes - From_ lines in the body of the message by putting a `>' in front of - them. - - Optional maxheaderlen specifies the longest length for a non-continued - header. When a header line is longer (in characters, with tabs - expanded to 8 spaces) than maxheaderlen, the header will split as - defined in the Header class. Set maxheaderlen to zero to disable - header wrapping. The default is 78, as recommended (but not required) - by RFC 2822. - """ - self._fp = outfp - self._mangle_from_ = mangle_from_ - self._maxheaderlen = maxheaderlen - - def write(self, s): - # Just delegate to the file object - self._fp.write(s) - - def flatten(self, msg, unixfrom=False): - """Print the message object tree rooted at msg to the output file - specified when the Generator instance was created. - - unixfrom is a flag that forces the printing of a Unix From_ delimiter - before the first object in the message tree. If the original message - has no From_ delimiter, a `standard' one is crafted. By default, this - is False to inhibit the printing of any From_ delimiter. - - Note that for subobjects, no From_ line is printed. - """ - if unixfrom: - ufrom = msg.get_unixfrom() - if not ufrom: - ufrom = 'From nobody ' + time.ctime(time.time()) - print >> self._fp, ufrom - self._write(msg) - - def clone(self, fp): - """Clone this generator with the exact same options.""" - return self.__class__(fp, self._mangle_from_, self._maxheaderlen) - - # - # Protected interface - undocumented ;/ - # - - def _write(self, msg): - # We can't write the headers yet because of the following scenario: - # say a multipart message includes the boundary string somewhere in - # its body. We'd have to calculate the new boundary /before/ we write - # the headers so that we can write the correct Content-Type: - # parameter. - # - # The way we do this, so as to make the _handle_*() methods simpler, - # is to cache any subpart writes into a StringIO. The we write the - # headers and the StringIO contents. That way, subpart handlers can - # Do The Right Thing, and can still modify the Content-Type: header if - # necessary. - oldfp = self._fp - try: - self._fp = sfp = StringIO() - self._dispatch(msg) - finally: - self._fp = oldfp - # Write the headers. First we see if the message object wants to - # handle that itself. If not, we'll do it generically. - meth = getattr(msg, '_write_headers', None) - if meth is None: - self._write_headers(msg) - else: - meth(self) - self._fp.write(sfp.getvalue()) - - def _dispatch(self, msg): - # Get the Content-Type: for the message, then try to dispatch to - # self._handle__(). If there's no handler for the - # full MIME type, then dispatch to self._handle_(). If - # that's missing too, then dispatch to self._writeBody(). - main = msg.get_content_maintype() - sub = msg.get_content_subtype() - specific = UNDERSCORE.join((main, sub)).replace('-', '_') - meth = getattr(self, '_handle_' + specific, None) - if meth is None: - generic = main.replace('-', '_') - meth = getattr(self, '_handle_' + generic, None) - if meth is None: - meth = self._writeBody - meth(msg) - - # - # Default handlers - # - - def _write_headers(self, msg): - for h, v in msg.items(): - print >> self._fp, '%s:' % h, - if self._maxheaderlen == 0: - # Explicit no-wrapping - print >> self._fp, v - elif isinstance(v, Header): - # Header instances know what to do - print >> self._fp, v.encode() - elif _is8bitstring(v): - # If we have raw 8bit data in a byte string, we have no idea - # what the encoding is. There is no safe way to split this - # string. If it's ascii-subset, then we could do a normal - # ascii split, but if it's multibyte then we could break the - # string. There's no way to know so the least harm seems to - # be to not split the string and risk it being too long. - print >> self._fp, v - else: - # Header's got lots of smarts, so use it. Note that this is - # fundamentally broken though because we lose idempotency when - # the header string is continued with tabs. It will now be - # continued with spaces. This was reversedly broken before we - # fixed bug 1974. Either way, we lose. - print >> self._fp, Header( - v, maxlinelen=self._maxheaderlen, header_name=h).encode() - # A blank line always separates headers from body - print >> self._fp - - # - # Handlers for writing types and subtypes - # - - def _handle_text(self, msg): - payload = msg.get_payload() - if payload is None: - return - if not isinstance(payload, basestring): - raise TypeError('string payload expected: %s' % type(payload)) - if self._mangle_from_: - payload = fcre.sub('>From ', payload) - self._fp.write(payload) - - # Default body handler - _writeBody = _handle_text - - def _handle_multipart(self, msg): - # The trick here is to write out each part separately, merge them all - # together, and then make sure that the boundary we've chosen isn't - # present in the payload. - msgtexts = [] - subparts = msg.get_payload() - if subparts is None: - subparts = [] - elif isinstance(subparts, basestring): - # e.g. a non-strict parse of a message with no starting boundary. - self._fp.write(subparts) - return - elif not isinstance(subparts, list): - # Scalar payload - subparts = [subparts] - for part in subparts: - s = StringIO() - g = self.clone(s) - g.flatten(part, unixfrom=False) - msgtexts.append(s.getvalue()) - # BAW: What about boundaries that are wrapped in double-quotes? - boundary = msg.get_boundary() - if not boundary: - # Create a boundary that doesn't appear in any of the - # message texts. - alltext = NL.join(msgtexts) - boundary = _make_boundary(alltext) - msg.set_boundary(boundary) - # If there's a preamble, write it out, with a trailing CRLF - if msg.preamble is not None: - if self._mangle_from_: - preamble = fcre.sub('>From ', msg.preamble) - else: - preamble = msg.preamble - print >> self._fp, preamble - # dash-boundary transport-padding CRLF - print >> self._fp, '--' + boundary - # body-part - if msgtexts: - self._fp.write(msgtexts.pop(0)) - # *encapsulation - # --> delimiter transport-padding - # --> CRLF body-part - for body_part in msgtexts: - # delimiter transport-padding CRLF - print >> self._fp, '\n--' + boundary - # body-part - self._fp.write(body_part) - # close-delimiter transport-padding - self._fp.write('\n--' + boundary + '--' + NL) - if msg.epilogue is not None: - if self._mangle_from_: - epilogue = fcre.sub('>From ', msg.epilogue) - else: - epilogue = msg.epilogue - self._fp.write(epilogue) - - def _handle_multipart_signed(self, msg): - # The contents of signed parts has to stay unmodified in order to keep - # the signature intact per RFC1847 2.1, so we disable header wrapping. - # RDM: This isn't enough to completely preserve the part, but it helps. - old_maxheaderlen = self._maxheaderlen - try: - self._maxheaderlen = 0 - self._handle_multipart(msg) - finally: - self._maxheaderlen = old_maxheaderlen - - def _handle_message_delivery_status(self, msg): - # We can't just write the headers directly to self's file object - # because this will leave an extra newline between the last header - # block and the boundary. Sigh. - blocks = [] - for part in msg.get_payload(): - s = StringIO() - g = self.clone(s) - g.flatten(part, unixfrom=False) - text = s.getvalue() - lines = text.split('\n') - # Strip off the unnecessary trailing empty line - if lines and lines[-1] == '': - blocks.append(NL.join(lines[:-1])) - else: - blocks.append(text) - # Now join all the blocks with an empty line. This has the lovely - # effect of separating each block with an empty line, but not adding - # an extra one after the last one. - self._fp.write(NL.join(blocks)) - - def _handle_message(self, msg): - s = StringIO() - g = self.clone(s) - # The payload of a message/rfc822 part should be a multipart sequence - # of length 1. The zeroth element of the list should be the Message - # object for the subpart. Extract that object, stringify it, and - # write it out. - # Except, it turns out, when it's a string instead, which happens when - # and only when HeaderParser is used on a message of mime type - # message/rfc822. Such messages are generated by, for example, - # Groupwise when forwarding unadorned messages. (Issue 7970.) So - # in that case we just emit the string body. - payload = msg.get_payload() - if isinstance(payload, list): - g.flatten(msg.get_payload(0), unixfrom=False) - payload = s.getvalue() - self._fp.write(payload) - - - -_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' - -class DecodedGenerator(Generator): - """Generates a text representation of a message. - - Like the Generator base class, except that non-text parts are substituted - with a format string representing the part. - """ - def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): - """Like Generator.__init__() except that an additional optional - argument is allowed. - - Walks through all subparts of a message. If the subpart is of main - type `text', then it prints the decoded payload of the subpart. - - Otherwise, fmt is a format string that is used instead of the message - payload. fmt is expanded with the following keywords (in - %(keyword)s format): - - type : Full MIME type of the non-text part - maintype : Main MIME type of the non-text part - subtype : Sub-MIME type of the non-text part - filename : Filename of the non-text part - description: Description associated with the non-text part - encoding : Content transfer encoding of the non-text part - - The default value for fmt is None, meaning - - [Non-text (%(type)s) part of message omitted, filename %(filename)s] - """ - Generator.__init__(self, outfp, mangle_from_, maxheaderlen) - if fmt is None: - self._fmt = _FMT - else: - self._fmt = fmt - - def _dispatch(self, msg): - for part in msg.walk(): - maintype = part.get_content_maintype() - if maintype == 'text': - print >> self, part.get_payload(decode=True) - elif maintype == 'multipart': - # Just skip this - pass - else: - print >> self, self._fmt % { - 'type' : part.get_content_type(), - 'maintype' : part.get_content_maintype(), - 'subtype' : part.get_content_subtype(), - 'filename' : part.get_filename('[no filename]'), - 'description': part.get('Content-Description', - '[no description]'), - 'encoding' : part.get('Content-Transfer-Encoding', - '[no encoding]'), - } - - - -# Helper -_width = len(repr(sys.maxint-1)) -_fmt = '%%0%dd' % _width - -def _make_boundary(text=None): - # Craft a random boundary. If text is given, ensure that the chosen - # boundary doesn't appear in the text. - token = random.randrange(sys.maxint) - boundary = ('=' * 15) + (_fmt % token) + '==' - if text is None: - return boundary - b = boundary - counter = 0 - while True: - cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) - if not cre.search(text): - break - b = boundary + '.' + str(counter) - counter += 1 - return b diff --git a/python/Lib/email/header.py b/python/Lib/email/header.py deleted file mode 100755 index 2cf870fd57..0000000000 --- a/python/Lib/email/header.py +++ /dev/null @@ -1,514 +0,0 @@ -# Copyright (C) 2002-2006 Python Software Foundation -# Author: Ben Gertzfield, Barry Warsaw -# Contact: email-sig@python.org - -"""Header encoding and decoding functionality.""" - -__all__ = [ - 'Header', - 'decode_header', - 'make_header', - ] - -import re -import binascii - -import email.quoprimime -import email.base64mime - -from email.errors import HeaderParseError -from email.charset import Charset - -NL = '\n' -SPACE = ' ' -USPACE = u' ' -SPACE8 = ' ' * 8 -UEMPTYSTRING = u'' - -MAXLINELEN = 76 - -USASCII = Charset('us-ascii') -UTF8 = Charset('utf-8') - -# Match encoded-word strings in the form =?charset?q?Hello_World?= -ecre = re.compile(r''' - =\? # literal =? - (?P[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P.*?) # non-greedy up to the next ?= is the encoded string - \?= # literal ?= - (?=[ \t]|$) # whitespace or the end of the string - ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) - -# Field name regexp, including trailing colon, but not separating whitespace, -# according to RFC 2822. Character range is from tilde to exclamation mark. -# For use with .match() -fcre = re.compile(r'[\041-\176]+:$') - -# Find a header embedded in a putative header value. Used to check for -# header injection attack. -_embeded_header = re.compile(r'\n[^ \t]+:') - - - -# Helpers -_max_append = email.quoprimime._max_append - - - -def decode_header(header): - """Decode a message header value without converting charset. - - Returns a list of (decoded_string, charset) pairs containing each of the - decoded parts of the header. Charset is None for non-encoded parts of the - header, otherwise a lower-case string containing the name of the character - set specified in the encoded string. - - An email.errors.HeaderParseError may be raised when certain decoding error - occurs (e.g. a base64 decoding exception). - """ - # If no encoding, just return the header - header = str(header) - if not ecre.search(header): - return [(header, None)] - decoded = [] - dec = '' - for line in header.splitlines(): - # This line might not have an encoding in it - if not ecre.search(line): - decoded.append((line, None)) - continue - parts = ecre.split(line) - while parts: - unenc = parts.pop(0).strip() - if unenc: - # Should we continue a long line? - if decoded and decoded[-1][1] is None: - decoded[-1] = (decoded[-1][0] + SPACE + unenc, None) - else: - decoded.append((unenc, None)) - if parts: - charset, encoding = [s.lower() for s in parts[0:2]] - encoded = parts[2] - dec = None - if encoding == 'q': - dec = email.quoprimime.header_decode(encoded) - elif encoding == 'b': - paderr = len(encoded) % 4 # Postel's law: add missing padding - if paderr: - encoded += '==='[:4 - paderr] - try: - dec = email.base64mime.decode(encoded) - except binascii.Error: - # Turn this into a higher level exception. BAW: Right - # now we throw the lower level exception away but - # when/if we get exception chaining, we'll preserve it. - raise HeaderParseError - if dec is None: - dec = encoded - - if decoded and decoded[-1][1] == charset: - decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1]) - else: - decoded.append((dec, charset)) - del parts[0:3] - return decoded - - - -def make_header(decoded_seq, maxlinelen=None, header_name=None, - continuation_ws=' '): - """Create a Header from a sequence of pairs as returned by decode_header() - - decode_header() takes a header value string and returns a sequence of - pairs of the format (decoded_string, charset) where charset is the string - name of the character set. - - This function takes one of those sequence of pairs and returns a Header - instance. Optional maxlinelen, header_name, and continuation_ws are as in - the Header constructor. - """ - h = Header(maxlinelen=maxlinelen, header_name=header_name, - continuation_ws=continuation_ws) - for s, charset in decoded_seq: - # None means us-ascii but we can simply pass it on to h.append() - if charset is not None and not isinstance(charset, Charset): - charset = Charset(charset) - h.append(s, charset) - return h - - - -class Header: - def __init__(self, s=None, charset=None, - maxlinelen=None, header_name=None, - continuation_ws=' ', errors='strict'): - """Create a MIME-compliant header that can contain many character sets. - - Optional s is the initial header value. If None, the initial header - value is not set. You can later append to the header with .append() - method calls. s may be a byte string or a Unicode string, but see the - .append() documentation for semantics. - - Optional charset serves two purposes: it has the same meaning as the - charset argument to the .append() method. It also sets the default - character set for all subsequent .append() calls that omit the charset - argument. If charset is not provided in the constructor, the us-ascii - charset is used both as s's initial charset and as the default for - subsequent .append() calls. - - The maximum line length can be specified explicit via maxlinelen. For - splitting the first line to a shorter value (to account for the field - header which isn't included in s, e.g. `Subject') pass in the name of - the field in header_name. The default maxlinelen is 76. - - continuation_ws must be RFC 2822 compliant folding whitespace (usually - either a space or a hard tab) which will be prepended to continuation - lines. - - errors is passed through to the .append() call. - """ - if charset is None: - charset = USASCII - if not isinstance(charset, Charset): - charset = Charset(charset) - self._charset = charset - self._continuation_ws = continuation_ws - cws_expanded_len = len(continuation_ws.replace('\t', SPACE8)) - # BAW: I believe `chunks' and `maxlinelen' should be non-public. - self._chunks = [] - if s is not None: - self.append(s, charset, errors) - if maxlinelen is None: - maxlinelen = MAXLINELEN - if header_name is None: - # We don't know anything about the field header so the first line - # is the same length as subsequent lines. - self._firstlinelen = maxlinelen - else: - # The first line should be shorter to take into account the field - # header. Also subtract off 2 extra for the colon and space. - self._firstlinelen = maxlinelen - len(header_name) - 2 - # Second and subsequent lines should subtract off the length in - # columns of the continuation whitespace prefix. - self._maxlinelen = maxlinelen - cws_expanded_len - - def __str__(self): - """A synonym for self.encode().""" - return self.encode() - - def __unicode__(self): - """Helper for the built-in unicode function.""" - uchunks = [] - lastcs = None - for s, charset in self._chunks: - # We must preserve spaces between encoded and non-encoded word - # boundaries, which means for us we need to add a space when we go - # from a charset to None/us-ascii, or from None/us-ascii to a - # charset. Only do this for the second and subsequent chunks. - nextcs = charset - if uchunks: - if lastcs not in (None, 'us-ascii'): - if nextcs in (None, 'us-ascii'): - uchunks.append(USPACE) - nextcs = None - elif nextcs not in (None, 'us-ascii'): - uchunks.append(USPACE) - lastcs = nextcs - uchunks.append(unicode(s, str(charset))) - return UEMPTYSTRING.join(uchunks) - - # Rich comparison operators for equality only. BAW: does it make sense to - # have or explicitly disable <, <=, >, >= operators? - def __eq__(self, other): - # other may be a Header or a string. Both are fine so coerce - # ourselves to a string, swap the args and do another comparison. - return other == self.encode() - - def __ne__(self, other): - return not self == other - - def append(self, s, charset=None, errors='strict'): - """Append a string to the MIME header. - - Optional charset, if given, should be a Charset instance or the name - of a character set (which will be converted to a Charset instance). A - value of None (the default) means that the charset given in the - constructor is used. - - s may be a byte string or a Unicode string. If it is a byte string - (i.e. isinstance(s, str) is true), then charset is the encoding of - that byte string, and a UnicodeError will be raised if the string - cannot be decoded with that charset. If s is a Unicode string, then - charset is a hint specifying the character set of the characters in - the string. In this case, when producing an RFC 2822 compliant header - using RFC 2047 rules, the Unicode string will be encoded using the - following charsets in order: us-ascii, the charset hint, utf-8. The - first character set not to provoke a UnicodeError is used. - - Optional `errors' is passed as the third argument to any unicode() or - ustr.encode() call. - """ - if charset is None: - charset = self._charset - elif not isinstance(charset, Charset): - charset = Charset(charset) - # If the charset is our faux 8bit charset, leave the string unchanged - if charset != '8bit': - # We need to test that the string can be converted to unicode and - # back to a byte string, given the input and output codecs of the - # charset. - if isinstance(s, str): - # Possibly raise UnicodeError if the byte string can't be - # converted to a unicode with the input codec of the charset. - incodec = charset.input_codec or 'us-ascii' - ustr = unicode(s, incodec, errors) - # Now make sure that the unicode could be converted back to a - # byte string with the output codec, which may be different - # than the iput coded. Still, use the original byte string. - outcodec = charset.output_codec or 'us-ascii' - ustr.encode(outcodec, errors) - elif isinstance(s, unicode): - # Now we have to be sure the unicode string can be converted - # to a byte string with a reasonable output codec. We want to - # use the byte string in the chunk. - for charset in USASCII, charset, UTF8: - try: - outcodec = charset.output_codec or 'us-ascii' - s = s.encode(outcodec, errors) - break - except UnicodeError: - pass - else: - assert False, 'utf-8 conversion failed' - self._chunks.append((s, charset)) - - def _split(self, s, charset, maxlinelen, splitchars): - # Split up a header safely for use with encode_chunks. - splittable = charset.to_splittable(s) - encoded = charset.from_splittable(splittable, True) - elen = charset.encoded_header_len(encoded) - # If the line's encoded length first, just return it - if elen <= maxlinelen: - return [(encoded, charset)] - # If we have undetermined raw 8bit characters sitting in a byte - # string, we really don't know what the right thing to do is. We - # can't really split it because it might be multibyte data which we - # could break if we split it between pairs. The least harm seems to - # be to not split the header at all, but that means they could go out - # longer than maxlinelen. - if charset == '8bit': - return [(s, charset)] - # BAW: I'm not sure what the right test here is. What we're trying to - # do is be faithful to RFC 2822's recommendation that ($2.2.3): - # - # "Note: Though structured field bodies are defined in such a way that - # folding can take place between many of the lexical tokens (and even - # within some of the lexical tokens), folding SHOULD be limited to - # placing the CRLF at higher-level syntactic breaks." - # - # For now, I can only imagine doing this when the charset is us-ascii, - # although it's possible that other charsets may also benefit from the - # higher-level syntactic breaks. - elif charset == 'us-ascii': - return self._split_ascii(s, charset, maxlinelen, splitchars) - # BAW: should we use encoded? - elif elen == len(s): - # We can split on _maxlinelen boundaries because we know that the - # encoding won't change the size of the string - splitpnt = maxlinelen - first = charset.from_splittable(splittable[:splitpnt], False) - last = charset.from_splittable(splittable[splitpnt:], False) - else: - # Binary search for split point - first, last = _binsplit(splittable, charset, maxlinelen) - # first is of the proper length so just wrap it in the appropriate - # chrome. last must be recursively split. - fsplittable = charset.to_splittable(first) - fencoded = charset.from_splittable(fsplittable, True) - chunk = [(fencoded, charset)] - return chunk + self._split(last, charset, self._maxlinelen, splitchars) - - def _split_ascii(self, s, charset, firstlen, splitchars): - chunks = _split_ascii(s, firstlen, self._maxlinelen, - self._continuation_ws, splitchars) - return zip(chunks, [charset]*len(chunks)) - - def _encode_chunks(self, newchunks, maxlinelen): - # MIME-encode a header with many different charsets and/or encodings. - # - # Given a list of pairs (string, charset), return a MIME-encoded - # string suitable for use in a header field. Each pair may have - # different charsets and/or encodings, and the resulting header will - # accurately reflect each setting. - # - # Each encoding can be email.utils.QP (quoted-printable, for - # ASCII-like character sets like iso-8859-1), email.utils.BASE64 - # (Base64, for non-ASCII like character sets like KOI8-R and - # iso-2022-jp), or None (no encoding). - # - # Each pair will be represented on a separate line; the resulting - # string will be in the format: - # - # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n - # =?charset2?b?SvxyZ2VuIEL2aW5n?=" - chunks = [] - for header, charset in newchunks: - if not header: - continue - if charset is None or charset.header_encoding is None: - s = header - else: - s = charset.header_encode(header) - # Don't add more folding whitespace than necessary - if chunks and chunks[-1].endswith(' '): - extra = '' - else: - extra = ' ' - _max_append(chunks, s, maxlinelen, extra) - joiner = NL + self._continuation_ws - return joiner.join(chunks) - - def encode(self, splitchars=';, '): - """Encode a message header into an RFC-compliant format. - - There are many issues involved in converting a given string for use in - an email header. Only certain character sets are readable in most - email clients, and as header strings can only contain a subset of - 7-bit ASCII, care must be taken to properly convert and encode (with - Base64 or quoted-printable) header strings. In addition, there is a - 75-character length limit on any given encoded header field, so - line-wrapping must be performed, even with double-byte character sets. - - This method will do its best to convert the string to the correct - character set used in email, and encode and line wrap it safely with - the appropriate scheme for that character set. - - If the given charset is not known or an error occurs during - conversion, this function will return the header untouched. - - Optional splitchars is a string containing characters to split long - ASCII lines on, in rough support of RFC 2822's `highest level - syntactic breaks'. This doesn't affect RFC 2047 encoded lines. - """ - newchunks = [] - maxlinelen = self._firstlinelen - lastlen = 0 - for s, charset in self._chunks: - # The first bit of the next chunk should be just long enough to - # fill the next line. Don't forget the space separating the - # encoded words. - targetlen = maxlinelen - lastlen - 1 - if targetlen < charset.encoded_header_len(''): - # Stick it on the next line - targetlen = maxlinelen - newchunks += self._split(s, charset, targetlen, splitchars) - lastchunk, lastcharset = newchunks[-1] - lastlen = lastcharset.encoded_header_len(lastchunk) - value = self._encode_chunks(newchunks, maxlinelen) - if _embeded_header.search(value): - raise HeaderParseError("header value appears to contain " - "an embedded header: {!r}".format(value)) - return value - - - -def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars): - lines = [] - maxlen = firstlen - for line in s.splitlines(): - # Ignore any leading whitespace (i.e. continuation whitespace) already - # on the line, since we'll be adding our own. - line = line.lstrip() - if len(line) < maxlen: - lines.append(line) - maxlen = restlen - continue - # Attempt to split the line at the highest-level syntactic break - # possible. Note that we don't have a lot of smarts about field - # syntax; we just try to break on semi-colons, then commas, then - # whitespace. - for ch in splitchars: - if ch in line: - break - else: - # There's nothing useful to split the line on, not even spaces, so - # just append this line unchanged - lines.append(line) - maxlen = restlen - continue - # Now split the line on the character plus trailing whitespace - cre = re.compile(r'%s\s*' % ch) - if ch in ';,': - eol = ch - else: - eol = '' - joiner = eol + ' ' - joinlen = len(joiner) - wslen = len(continuation_ws.replace('\t', SPACE8)) - this = [] - linelen = 0 - for part in cre.split(line): - curlen = linelen + max(0, len(this)-1) * joinlen - partlen = len(part) - onfirstline = not lines - # We don't want to split after the field name, if we're on the - # first line and the field name is present in the header string. - if ch == ' ' and onfirstline and \ - len(this) == 1 and fcre.match(this[0]): - this.append(part) - linelen += partlen - elif curlen + partlen > maxlen: - if this: - lines.append(joiner.join(this) + eol) - # If this part is longer than maxlen and we aren't already - # splitting on whitespace, try to recursively split this line - # on whitespace. - if partlen > maxlen and ch != ' ': - subl = _split_ascii(part, maxlen, restlen, - continuation_ws, ' ') - lines.extend(subl[:-1]) - this = [subl[-1]] - else: - this = [part] - linelen = wslen + len(this[-1]) - maxlen = restlen - else: - this.append(part) - linelen += partlen - # Put any left over parts on a line by themselves - if this: - lines.append(joiner.join(this)) - return lines - - - -def _binsplit(splittable, charset, maxlinelen): - i = 0 - j = len(splittable) - while i < j: - # Invariants: - # 1. splittable[:k] fits for all k <= i (note that we *assume*, - # at the start, that splittable[:0] fits). - # 2. splittable[:k] does not fit for any k > j (at the start, - # this means we shouldn't look at any k > len(splittable)). - # 3. We don't know about splittable[:k] for k in i+1..j. - # 4. We want to set i to the largest k that fits, with i <= k <= j. - # - m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j - chunk = charset.from_splittable(splittable[:m], True) - chunklen = charset.encoded_header_len(chunk) - if chunklen <= maxlinelen: - # m is acceptable, so is a new lower bound. - i = m - else: - # m is not acceptable, so final i must be < m. - j = m - 1 - # i == j. Invariant #1 implies that splittable[:i] fits, and - # invariant #2 implies that splittable[:i+1] does not fit, so i - # is what we're looking for. - first = charset.from_splittable(splittable[:i], False) - last = charset.from_splittable(splittable[i:], False) - return first, last diff --git a/python/Lib/email/iterators.py b/python/Lib/email/iterators.py deleted file mode 100755 index e99f2280da..0000000000 --- a/python/Lib/email/iterators.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Various types of useful iterators and generators.""" - -__all__ = [ - 'body_line_iterator', - 'typed_subpart_iterator', - 'walk', - # Do not include _structure() since it's part of the debugging API. - ] - -import sys -from cStringIO import StringIO - - - -# This function will become a method of the Message class -def walk(self): - """Walk over the message tree, yielding each subpart. - - The walk is performed in depth-first order. This method is a - generator. - """ - yield self - if self.is_multipart(): - for subpart in self.get_payload(): - for subsubpart in subpart.walk(): - yield subsubpart - - - -# These two functions are imported into the Iterators.py interface module. -def body_line_iterator(msg, decode=False): - """Iterate over the parts, returning string payloads line-by-line. - - Optional decode (default False) is passed through to .get_payload(). - """ - for subpart in msg.walk(): - payload = subpart.get_payload(decode=decode) - if isinstance(payload, basestring): - for line in StringIO(payload): - yield line - - -def typed_subpart_iterator(msg, maintype='text', subtype=None): - """Iterate over the subparts with a given MIME type. - - Use `maintype' as the main MIME type to match against; this defaults to - "text". Optional `subtype' is the MIME subtype to match against; if - omitted, only the main type is matched. - """ - for subpart in msg.walk(): - if subpart.get_content_maintype() == maintype: - if subtype is None or subpart.get_content_subtype() == subtype: - yield subpart - - - -def _structure(msg, fp=None, level=0, include_default=False): - """A handy debugging aid""" - if fp is None: - fp = sys.stdout - tab = ' ' * (level * 4) - print >> fp, tab + msg.get_content_type(), - if include_default: - print >> fp, '[%s]' % msg.get_default_type() - else: - print >> fp - if msg.is_multipart(): - for subpart in msg.get_payload(): - _structure(subpart, fp, level+1, include_default) diff --git a/python/Lib/email/message.py b/python/Lib/email/message.py deleted file mode 100755 index d7358cd640..0000000000 --- a/python/Lib/email/message.py +++ /dev/null @@ -1,797 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Basic message object for the email package object model.""" - -__all__ = ['Message'] - -import re -import uu -import binascii -import warnings -from cStringIO import StringIO - -# Intrapackage imports -import email.charset -from email import utils -from email import errors - -SEMISPACE = '; ' - -# Regular expression that matches `special' characters in parameters, the -# existence of which force quoting of the parameter value. -tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') - - -# Helper functions -def _splitparam(param): - # Split header parameters. BAW: this may be too simple. It isn't - # strictly RFC 2045 (section 5.1) compliant, but it catches most headers - # found in the wild. We may eventually need a full fledged parser - # eventually. - a, sep, b = param.partition(';') - if not sep: - return a.strip(), None - return a.strip(), b.strip() - -def _formatparam(param, value=None, quote=True): - """Convenience function to format and return a key=value pair. - - This will quote the value if needed or if quote is true. If value is a - three tuple (charset, language, value), it will be encoded according - to RFC2231 rules. - """ - if value is not None and len(value) > 0: - # A tuple is used for RFC 2231 encoded parameter values where items - # are (charset, language, value). charset is a string, not a Charset - # instance. - if isinstance(value, tuple): - # Encode as per RFC 2231 - param += '*' - value = utils.encode_rfc2231(value[2], value[0], value[1]) - # BAW: Please check this. I think that if quote is set it should - # force quoting even if not necessary. - if quote or tspecials.search(value): - return '%s="%s"' % (param, utils.quote(value)) - else: - return '%s=%s' % (param, value) - else: - return param - -def _parseparam(s): - plist = [] - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - if '=' in f: - i = f.index('=') - f = f[:i].strip().lower() + '=' + f[i+1:].strip() - plist.append(f.strip()) - s = s[end:] - return plist - - -def _unquotevalue(value): - # This is different than utils.collapse_rfc2231_value() because it doesn't - # try to convert the value to a unicode. Message.get_param() and - # Message.get_params() are both currently defined to return the tuple in - # the face of RFC 2231 parameters. - if isinstance(value, tuple): - return value[0], value[1], utils.unquote(value[2]) - else: - return utils.unquote(value) - - - -class Message: - """Basic message object. - - A message object is defined as something that has a bunch of RFC 2822 - headers and a payload. It may optionally have an envelope header - (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a - multipart or a message/rfc822), then the payload is a list of Message - objects, otherwise it is a string. - - Message objects implement part of the `mapping' interface, which assumes - there is exactly one occurrence of the header per message. Some headers - do in fact appear multiple times (e.g. Received) and for those headers, - you must use the explicit API to set or get all the headers. Not all of - the mapping methods are implemented. - """ - def __init__(self): - self._headers = [] - self._unixfrom = None - self._payload = None - self._charset = None - # Defaults for multipart messages - self.preamble = self.epilogue = None - self.defects = [] - # Default content type - self._default_type = 'text/plain' - - def __str__(self): - """Return the entire formatted message as a string. - This includes the headers, body, and envelope header. - """ - return self.as_string(unixfrom=True) - - def as_string(self, unixfrom=False): - """Return the entire formatted message as a string. - Optional `unixfrom' when True, means include the Unix From_ envelope - header. - - This is a convenience method and may not generate the message exactly - as you intend because by default it mangles lines that begin with - "From ". For more flexibility, use the flatten() method of a - Generator instance. - """ - from email.generator import Generator - fp = StringIO() - g = Generator(fp) - g.flatten(self, unixfrom=unixfrom) - return fp.getvalue() - - def is_multipart(self): - """Return True if the message consists of multiple parts.""" - return isinstance(self._payload, list) - - # - # Unix From_ line - # - def set_unixfrom(self, unixfrom): - self._unixfrom = unixfrom - - def get_unixfrom(self): - return self._unixfrom - - # - # Payload manipulation. - # - def attach(self, payload): - """Add the given payload to the current payload. - - The current payload will always be a list of objects after this method - is called. If you want to set the payload to a scalar object, use - set_payload() instead. - """ - if self._payload is None: - self._payload = [payload] - else: - self._payload.append(payload) - - def get_payload(self, i=None, decode=False): - """Return a reference to the payload. - - The payload will either be a list object or a string. If you mutate - the list object, you modify the message's payload in place. Optional - i returns that index into the payload. - - Optional decode is a flag indicating whether the payload should be - decoded or not, according to the Content-Transfer-Encoding header - (default is False). - - When True and the message is not a multipart, the payload will be - decoded if this header's value is `quoted-printable' or `base64'. If - some other encoding is used, or the header is missing, or if the - payload has bogus data (i.e. bogus base64 or uuencoded data), the - payload is returned as-is. - - If the message is a multipart and the decode flag is True, then None - is returned. - """ - if i is None: - payload = self._payload - elif not isinstance(self._payload, list): - raise TypeError('Expected list, got %s' % type(self._payload)) - else: - payload = self._payload[i] - if decode: - if self.is_multipart(): - return None - cte = self.get('content-transfer-encoding', '').lower() - if cte == 'quoted-printable': - return utils._qdecode(payload) - elif cte == 'base64': - try: - return utils._bdecode(payload) - except binascii.Error: - # Incorrect padding - return payload - elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): - sfp = StringIO() - try: - uu.decode(StringIO(payload+'\n'), sfp, quiet=True) - payload = sfp.getvalue() - except uu.Error: - # Some decoding problem - return payload - # Everything else, including encodings with 8bit or 7bit are returned - # unchanged. - return payload - - def set_payload(self, payload, charset=None): - """Set the payload to the given value. - - Optional charset sets the message's default character set. See - set_charset() for details. - """ - self._payload = payload - if charset is not None: - self.set_charset(charset) - - def set_charset(self, charset): - """Set the charset of the payload to a given character set. - - charset can be a Charset instance, a string naming a character set, or - None. If it is a string it will be converted to a Charset instance. - If charset is None, the charset parameter will be removed from the - Content-Type field. Anything else will generate a TypeError. - - The message will be assumed to be of type text/* encoded with - charset.input_charset. It will be converted to charset.output_charset - and encoded properly, if needed, when generating the plain text - representation of the message. MIME headers (MIME-Version, - Content-Type, Content-Transfer-Encoding) will be added as needed. - - """ - if charset is None: - self.del_param('charset') - self._charset = None - return - if isinstance(charset, basestring): - charset = email.charset.Charset(charset) - if not isinstance(charset, email.charset.Charset): - raise TypeError(charset) - # BAW: should we accept strings that can serve as arguments to the - # Charset constructor? - self._charset = charset - if 'MIME-Version' not in self: - self.add_header('MIME-Version', '1.0') - if 'Content-Type' not in self: - self.add_header('Content-Type', 'text/plain', - charset=charset.get_output_charset()) - else: - self.set_param('charset', charset.get_output_charset()) - if isinstance(self._payload, unicode): - self._payload = self._payload.encode(charset.output_charset) - if str(charset) != charset.get_output_charset(): - self._payload = charset.body_encode(self._payload) - if 'Content-Transfer-Encoding' not in self: - cte = charset.get_body_encoding() - try: - cte(self) - except TypeError: - self._payload = charset.body_encode(self._payload) - self.add_header('Content-Transfer-Encoding', cte) - - def get_charset(self): - """Return the Charset instance associated with the message's payload. - """ - return self._charset - - # - # MAPPING INTERFACE (partial) - # - def __len__(self): - """Return the total number of headers, including duplicates.""" - return len(self._headers) - - def __getitem__(self, name): - """Get a header value. - - Return None if the header is missing instead of raising an exception. - - Note that if the header appeared multiple times, exactly which - occurrence gets returned is undefined. Use get_all() to get all - the values matching a header field name. - """ - return self.get(name) - - def __setitem__(self, name, val): - """Set the value of a header. - - Note: this does not overwrite an existing header with the same field - name. Use __delitem__() first to delete any existing headers. - """ - self._headers.append((name, val)) - - def __delitem__(self, name): - """Delete all occurrences of a header, if present. - - Does not raise an exception if the header is missing. - """ - name = name.lower() - newheaders = [] - for k, v in self._headers: - if k.lower() != name: - newheaders.append((k, v)) - self._headers = newheaders - - def __contains__(self, name): - return name.lower() in [k.lower() for k, v in self._headers] - - def has_key(self, name): - """Return true if the message contains the header.""" - missing = object() - return self.get(name, missing) is not missing - - def keys(self): - """Return a list of all the message's header field names. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [k for k, v in self._headers] - - def values(self): - """Return a list of all the message's header values. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [v for k, v in self._headers] - - def items(self): - """Get all the message's header fields and values. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return self._headers[:] - - def get(self, name, failobj=None): - """Get a header value. - - Like __getitem__() but return failobj instead of None when the field - is missing. - """ - name = name.lower() - for k, v in self._headers: - if k.lower() == name: - return v - return failobj - - # - # Additional useful stuff - # - - def get_all(self, name, failobj=None): - """Return a list of all the values for the named field. - - These will be sorted in the order they appeared in the original - message, and may contain duplicates. Any fields deleted and - re-inserted are always appended to the header list. - - If no such fields exist, failobj is returned (defaults to None). - """ - values = [] - name = name.lower() - for k, v in self._headers: - if k.lower() == name: - values.append(v) - if not values: - return failobj - return values - - def add_header(self, _name, _value, **_params): - """Extended header setting. - - name is the header field to add. keyword arguments can be used to set - additional parameters for the header field, with underscores converted - to dashes. Normally the parameter will be added as key="value" unless - value is None, in which case only the key will be added. If a - parameter value contains non-ASCII characters it must be specified as a - three-tuple of (charset, language, value), in which case it will be - encoded according to RFC2231 rules. - - Example: - - msg.add_header('content-disposition', 'attachment', filename='bud.gif') - """ - parts = [] - for k, v in _params.items(): - if v is None: - parts.append(k.replace('_', '-')) - else: - parts.append(_formatparam(k.replace('_', '-'), v)) - if _value is not None: - parts.insert(0, _value) - self._headers.append((_name, SEMISPACE.join(parts))) - - def replace_header(self, _name, _value): - """Replace a header. - - Replace the first matching header found in the message, retaining - header order and case. If no matching header was found, a KeyError is - raised. - """ - _name = _name.lower() - for i, (k, v) in zip(range(len(self._headers)), self._headers): - if k.lower() == _name: - self._headers[i] = (k, _value) - break - else: - raise KeyError(_name) - - # - # Use these three methods instead of the three above. - # - - def get_content_type(self): - """Return the message's content type. - - The returned string is coerced to lower case of the form - `maintype/subtype'. If there was no Content-Type header in the - message, the default type as given by get_default_type() will be - returned. Since according to RFC 2045, messages always have a default - type this will always return a value. - - RFC 2045 defines a message's default type to be text/plain unless it - appears inside a multipart/digest container, in which case it would be - message/rfc822. - """ - missing = object() - value = self.get('content-type', missing) - if value is missing: - # This should have no parameters - return self.get_default_type() - ctype = _splitparam(value)[0].lower() - # RFC 2045, section 5.2 says if its invalid, use text/plain - if ctype.count('/') != 1: - return 'text/plain' - return ctype - - def get_content_maintype(self): - """Return the message's main content type. - - This is the `maintype' part of the string returned by - get_content_type(). - """ - ctype = self.get_content_type() - return ctype.split('/')[0] - - def get_content_subtype(self): - """Returns the message's sub-content type. - - This is the `subtype' part of the string returned by - get_content_type(). - """ - ctype = self.get_content_type() - return ctype.split('/')[1] - - def get_default_type(self): - """Return the `default' content type. - - Most messages have a default content type of text/plain, except for - messages that are subparts of multipart/digest containers. Such - subparts have a default content type of message/rfc822. - """ - return self._default_type - - def set_default_type(self, ctype): - """Set the `default' content type. - - ctype should be either "text/plain" or "message/rfc822", although this - is not enforced. The default content type is not stored in the - Content-Type header. - """ - self._default_type = ctype - - def _get_params_preserve(self, failobj, header): - # Like get_params() but preserves the quoting of values. BAW: - # should this be part of the public interface? - missing = object() - value = self.get(header, missing) - if value is missing: - return failobj - params = [] - for p in _parseparam(';' + value): - try: - name, val = p.split('=', 1) - name = name.strip() - val = val.strip() - except ValueError: - # Must have been a bare attribute - name = p.strip() - val = '' - params.append((name, val)) - params = utils.decode_params(params) - return params - - def get_params(self, failobj=None, header='content-type', unquote=True): - """Return the message's Content-Type parameters, as a list. - - The elements of the returned list are 2-tuples of key/value pairs, as - split on the `=' sign. The left hand side of the `=' is the key, - while the right hand side is the value. If there is no `=' sign in - the parameter the value is the empty string. The value is as - described in the get_param() method. - - Optional failobj is the object to return if there is no Content-Type - header. Optional header is the header to search instead of - Content-Type. If unquote is True, the value is unquoted. - """ - missing = object() - params = self._get_params_preserve(missing, header) - if params is missing: - return failobj - if unquote: - return [(k, _unquotevalue(v)) for k, v in params] - else: - return params - - def get_param(self, param, failobj=None, header='content-type', - unquote=True): - """Return the parameter value if found in the Content-Type header. - - Optional failobj is the object to return if there is no Content-Type - header, or the Content-Type header has no such parameter. Optional - header is the header to search instead of Content-Type. - - Parameter keys are always compared case insensitively. The return - value can either be a string, or a 3-tuple if the parameter was RFC - 2231 encoded. When it's a 3-tuple, the elements of the value are of - the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and - LANGUAGE can be None, in which case you should consider VALUE to be - encoded in the us-ascii charset. You can usually ignore LANGUAGE. - - Your application should be prepared to deal with 3-tuple return - values, and can convert the parameter to a Unicode string like so: - - param = msg.get_param('foo') - if isinstance(param, tuple): - param = unicode(param[2], param[0] or 'us-ascii') - - In any case, the parameter value (either the returned string, or the - VALUE item in the 3-tuple) is always unquoted, unless unquote is set - to False. - """ - if header not in self: - return failobj - for k, v in self._get_params_preserve(failobj, header): - if k.lower() == param.lower(): - if unquote: - return _unquotevalue(v) - else: - return v - return failobj - - def set_param(self, param, value, header='Content-Type', requote=True, - charset=None, language=''): - """Set a parameter in the Content-Type header. - - If the parameter already exists in the header, its value will be - replaced with the new value. - - If header is Content-Type and has not yet been defined for this - message, it will be set to "text/plain" and the new parameter and - value will be appended as per RFC 2045. - - An alternate header can be specified in the header argument, and all - parameters will be quoted as necessary unless requote is False. - - If charset is specified, the parameter will be encoded according to RFC - 2231. Optional language specifies the RFC 2231 language, defaulting - to the empty string. Both charset and language should be strings. - """ - if not isinstance(value, tuple) and charset: - value = (charset, language, value) - - if header not in self and header.lower() == 'content-type': - ctype = 'text/plain' - else: - ctype = self.get(header) - if not self.get_param(param, header=header): - if not ctype: - ctype = _formatparam(param, value, requote) - else: - ctype = SEMISPACE.join( - [ctype, _formatparam(param, value, requote)]) - else: - ctype = '' - for old_param, old_value in self.get_params(header=header, - unquote=requote): - append_param = '' - if old_param.lower() == param.lower(): - append_param = _formatparam(param, value, requote) - else: - append_param = _formatparam(old_param, old_value, requote) - if not ctype: - ctype = append_param - else: - ctype = SEMISPACE.join([ctype, append_param]) - if ctype != self.get(header): - del self[header] - self[header] = ctype - - def del_param(self, param, header='content-type', requote=True): - """Remove the given parameter completely from the Content-Type header. - - The header will be re-written in place without the parameter or its - value. All values will be quoted as necessary unless requote is - False. Optional header specifies an alternative to the Content-Type - header. - """ - if header not in self: - return - new_ctype = '' - for p, v in self.get_params(header=header, unquote=requote): - if p.lower() != param.lower(): - if not new_ctype: - new_ctype = _formatparam(p, v, requote) - else: - new_ctype = SEMISPACE.join([new_ctype, - _formatparam(p, v, requote)]) - if new_ctype != self.get(header): - del self[header] - self[header] = new_ctype - - def set_type(self, type, header='Content-Type', requote=True): - """Set the main type and subtype for the Content-Type header. - - type must be a string in the form "maintype/subtype", otherwise a - ValueError is raised. - - This method replaces the Content-Type header, keeping all the - parameters in place. If requote is False, this leaves the existing - header's quoting as is. Otherwise, the parameters will be quoted (the - default). - - An alternative header can be specified in the header argument. When - the Content-Type header is set, we'll always also add a MIME-Version - header. - """ - # BAW: should we be strict? - if not type.count('/') == 1: - raise ValueError - # Set the Content-Type, you get a MIME-Version - if header.lower() == 'content-type': - del self['mime-version'] - self['MIME-Version'] = '1.0' - if header not in self: - self[header] = type - return - params = self.get_params(header=header, unquote=requote) - del self[header] - self[header] = type - # Skip the first param; it's the old type. - for p, v in params[1:]: - self.set_param(p, v, header, requote) - - def get_filename(self, failobj=None): - """Return the filename associated with the payload if present. - - The filename is extracted from the Content-Disposition header's - `filename' parameter, and it is unquoted. If that header is missing - the `filename' parameter, this method falls back to looking for the - `name' parameter. - """ - missing = object() - filename = self.get_param('filename', missing, 'content-disposition') - if filename is missing: - filename = self.get_param('name', missing, 'content-type') - if filename is missing: - return failobj - return utils.collapse_rfc2231_value(filename).strip() - - def get_boundary(self, failobj=None): - """Return the boundary associated with the payload if present. - - The boundary is extracted from the Content-Type header's `boundary' - parameter, and it is unquoted. - """ - missing = object() - boundary = self.get_param('boundary', missing) - if boundary is missing: - return failobj - # RFC 2046 says that boundaries may begin but not end in w/s - return utils.collapse_rfc2231_value(boundary).rstrip() - - def set_boundary(self, boundary): - """Set the boundary parameter in Content-Type to 'boundary'. - - This is subtly different than deleting the Content-Type header and - adding a new one with a new boundary parameter via add_header(). The - main difference is that using the set_boundary() method preserves the - order of the Content-Type header in the original message. - - HeaderParseError is raised if the message has no Content-Type header. - """ - missing = object() - params = self._get_params_preserve(missing, 'content-type') - if params is missing: - # There was no Content-Type header, and we don't know what type - # to set it to, so raise an exception. - raise errors.HeaderParseError('No Content-Type header found') - newparams = [] - foundp = False - for pk, pv in params: - if pk.lower() == 'boundary': - newparams.append(('boundary', '"%s"' % boundary)) - foundp = True - else: - newparams.append((pk, pv)) - if not foundp: - # The original Content-Type header had no boundary attribute. - # Tack one on the end. BAW: should we raise an exception - # instead??? - newparams.append(('boundary', '"%s"' % boundary)) - # Replace the existing Content-Type header with the new value - newheaders = [] - for h, v in self._headers: - if h.lower() == 'content-type': - parts = [] - for k, v in newparams: - if v == '': - parts.append(k) - else: - parts.append('%s=%s' % (k, v)) - newheaders.append((h, SEMISPACE.join(parts))) - - else: - newheaders.append((h, v)) - self._headers = newheaders - - def get_content_charset(self, failobj=None): - """Return the charset parameter of the Content-Type header. - - The returned string is always coerced to lower case. If there is no - Content-Type header, or if that header has no charset parameter, - failobj is returned. - """ - missing = object() - charset = self.get_param('charset', missing) - if charset is missing: - return failobj - if isinstance(charset, tuple): - # RFC 2231 encoded, so decode it, and it better end up as ascii. - pcharset = charset[0] or 'us-ascii' - try: - # LookupError will be raised if the charset isn't known to - # Python. UnicodeError will be raised if the encoded text - # contains a character not in the charset. - charset = unicode(charset[2], pcharset).encode('us-ascii') - except (LookupError, UnicodeError): - charset = charset[2] - # charset character must be in us-ascii range - try: - if isinstance(charset, str): - charset = unicode(charset, 'us-ascii') - charset = charset.encode('us-ascii') - except UnicodeError: - return failobj - # RFC 2046, $4.1.2 says charsets are not case sensitive - return charset.lower() - - def get_charsets(self, failobj=None): - """Return a list containing the charset(s) used in this message. - - The returned list of items describes the Content-Type headers' - charset parameter for this message and all the subparts in its - payload. - - Each item will either be a string (the value of the charset parameter - in the Content-Type header of that part) or the value of the - 'failobj' parameter (defaults to None), if the part does not have a - main MIME type of "text", or the charset is not defined. - - The list will contain one string for each part of the message, plus - one for the container message (i.e. self), so that a non-multipart - message will still return a list of length 1. - """ - return [part.get_content_charset(failobj) for part in self.walk()] - - # I.e. def walk(self): ... - from email.iterators import walk diff --git a/python/Lib/email/mime/__init__.py b/python/Lib/email/mime/__init__.py deleted file mode 100755 index e69de29bb2..0000000000 diff --git a/python/Lib/email/mime/application.py b/python/Lib/email/mime/application.py deleted file mode 100755 index f5c5905564..0000000000 --- a/python/Lib/email/mime/application.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Keith Dart -# Contact: email-sig@python.org - -"""Class representing application/* type MIME documents.""" - -__all__ = ["MIMEApplication"] - -from email import encoders -from email.mime.nonmultipart import MIMENonMultipart - - -class MIMEApplication(MIMENonMultipart): - """Class for generating application/* MIME documents.""" - - def __init__(self, _data, _subtype='octet-stream', - _encoder=encoders.encode_base64, **_params): - """Create an application/* type MIME document. - - _data is a string containing the raw application data. - - _subtype is the MIME content type subtype, defaulting to - 'octet-stream'. - - _encoder is a function which will perform the actual encoding for - transport of the application data, defaulting to base64 encoding. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - raise TypeError('Invalid application MIME subtype') - MIMENonMultipart.__init__(self, 'application', _subtype, **_params) - self.set_payload(_data) - _encoder(self) diff --git a/python/Lib/email/mime/audio.py b/python/Lib/email/mime/audio.py deleted file mode 100755 index c7290c4b1c..0000000000 --- a/python/Lib/email/mime/audio.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Anthony Baxter -# Contact: email-sig@python.org - -"""Class representing audio/* type MIME documents.""" - -__all__ = ['MIMEAudio'] - -import sndhdr - -from cStringIO import StringIO -from email import encoders -from email.mime.nonmultipart import MIMENonMultipart - - - -_sndhdr_MIMEmap = {'au' : 'basic', - 'wav' :'x-wav', - 'aiff':'x-aiff', - 'aifc':'x-aiff', - } - -# There are others in sndhdr that don't have MIME types. :( -# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? -def _whatsnd(data): - """Try to identify a sound file type. - - sndhdr.what() has a pretty cruddy interface, unfortunately. This is why - we re-do it here. It would be easier to reverse engineer the Unix 'file' - command and use the standard 'magic' file, as shipped with a modern Unix. - """ - hdr = data[:512] - fakefile = StringIO(hdr) - for testfn in sndhdr.tests: - res = testfn(hdr, fakefile) - if res is not None: - return _sndhdr_MIMEmap.get(res[0]) - return None - - - -class MIMEAudio(MIMENonMultipart): - """Class for generating audio/* MIME documents.""" - - def __init__(self, _audiodata, _subtype=None, - _encoder=encoders.encode_base64, **_params): - """Create an audio/* type MIME document. - - _audiodata is a string containing the raw audio data. If this data - can be decoded by the standard Python `sndhdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific audio subtype via the - _subtype parameter. If _subtype is not given, and no subtype can be - guessed, a TypeError is raised. - - _encoder is a function which will perform the actual encoding for - transport of the image data. It takes one argument, which is this - Image instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding or other headers to the message as - necessary. The default encoding is Base64. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - _subtype = _whatsnd(_audiodata) - if _subtype is None: - raise TypeError('Could not find audio MIME subtype') - MIMENonMultipart.__init__(self, 'audio', _subtype, **_params) - self.set_payload(_audiodata) - _encoder(self) diff --git a/python/Lib/email/mime/base.py b/python/Lib/email/mime/base.py deleted file mode 100755 index ac919258b1..0000000000 --- a/python/Lib/email/mime/base.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Base class for MIME specializations.""" - -__all__ = ['MIMEBase'] - -from email import message - - - -class MIMEBase(message.Message): - """Base class for MIME specializations.""" - - def __init__(self, _maintype, _subtype, **_params): - """This constructor adds a Content-Type: and a MIME-Version: header. - - The Content-Type: header is taken from the _maintype and _subtype - arguments. Additional parameters for this header are taken from the - keyword arguments. - """ - message.Message.__init__(self) - ctype = '%s/%s' % (_maintype, _subtype) - self.add_header('Content-Type', ctype, **_params) - self['MIME-Version'] = '1.0' diff --git a/python/Lib/email/mime/image.py b/python/Lib/email/mime/image.py deleted file mode 100755 index 5563823239..0000000000 --- a/python/Lib/email/mime/image.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Class representing image/* type MIME documents.""" - -__all__ = ['MIMEImage'] - -import imghdr - -from email import encoders -from email.mime.nonmultipart import MIMENonMultipart - - - -class MIMEImage(MIMENonMultipart): - """Class for generating image/* type MIME documents.""" - - def __init__(self, _imagedata, _subtype=None, - _encoder=encoders.encode_base64, **_params): - """Create an image/* type MIME document. - - _imagedata is a string containing the raw image data. If this data - can be decoded by the standard Python `imghdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific image subtype via the _subtype - parameter. - - _encoder is a function which will perform the actual encoding for - transport of the image data. It takes one argument, which is this - Image instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding or other headers to the message as - necessary. The default encoding is Base64. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - _subtype = imghdr.what(None, _imagedata) - if _subtype is None: - raise TypeError('Could not guess image MIME subtype') - MIMENonMultipart.__init__(self, 'image', _subtype, **_params) - self.set_payload(_imagedata) - _encoder(self) diff --git a/python/Lib/email/mime/message.py b/python/Lib/email/mime/message.py deleted file mode 100755 index 275dbfd088..0000000000 --- a/python/Lib/email/mime/message.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Class representing message/* MIME documents.""" - -__all__ = ['MIMEMessage'] - -from email import message -from email.mime.nonmultipart import MIMENonMultipart - - - -class MIMEMessage(MIMENonMultipart): - """Class representing message/* MIME documents.""" - - def __init__(self, _msg, _subtype='rfc822'): - """Create a message/* type MIME document. - - _msg is a message object and must be an instance of Message, or a - derived class of Message, otherwise a TypeError is raised. - - Optional _subtype defines the subtype of the contained message. The - default is "rfc822" (this is defined by the MIME standard, even though - the term "rfc822" is technically outdated by RFC 2822). - """ - MIMENonMultipart.__init__(self, 'message', _subtype) - if not isinstance(_msg, message.Message): - raise TypeError('Argument is not an instance of Message') - # It's convenient to use this base class method. We need to do it - # this way or we'll get an exception - message.Message.attach(self, _msg) - # And be sure our default type is set correctly - self.set_default_type('message/rfc822') diff --git a/python/Lib/email/mime/multipart.py b/python/Lib/email/mime/multipart.py deleted file mode 100755 index 96618650c5..0000000000 --- a/python/Lib/email/mime/multipart.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (C) 2002-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Base class for MIME multipart/* type messages.""" - -__all__ = ['MIMEMultipart'] - -from email.mime.base import MIMEBase - - - -class MIMEMultipart(MIMEBase): - """Base class for MIME multipart/* type messages.""" - - def __init__(self, _subtype='mixed', boundary=None, _subparts=None, - **_params): - """Creates a multipart/* type message. - - By default, creates a multipart/mixed message, with proper - Content-Type and MIME-Version headers. - - _subtype is the subtype of the multipart content type, defaulting to - `mixed'. - - boundary is the multipart boundary string. By default it is - calculated as needed. - - _subparts is a sequence of initial subparts for the payload. It - must be an iterable object, such as a list. You can always - attach new subparts to the message by using the attach() method. - - Additional parameters for the Content-Type header are taken from the - keyword arguments (or passed into the _params argument). - """ - MIMEBase.__init__(self, 'multipart', _subtype, **_params) - - # Initialise _payload to an empty list as the Message superclass's - # implementation of is_multipart assumes that _payload is a list for - # multipart messages. - self._payload = [] - - if _subparts: - for p in _subparts: - self.attach(p) - if boundary: - self.set_boundary(boundary) diff --git a/python/Lib/email/mime/nonmultipart.py b/python/Lib/email/mime/nonmultipart.py deleted file mode 100755 index e1f51968b5..0000000000 --- a/python/Lib/email/mime/nonmultipart.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (C) 2002-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Base class for MIME type messages that are not multipart.""" - -__all__ = ['MIMENonMultipart'] - -from email import errors -from email.mime.base import MIMEBase - - - -class MIMENonMultipart(MIMEBase): - """Base class for MIME non-multipart type messages.""" - - def attach(self, payload): - # The public API prohibits attaching multiple subparts to MIMEBase - # derived subtypes since none of them are, by definition, of content - # type multipart/* - raise errors.MultipartConversionError( - 'Cannot attach additional subparts to non-multipart/*') diff --git a/python/Lib/email/mime/text.py b/python/Lib/email/mime/text.py deleted file mode 100755 index 5747db5d67..0000000000 --- a/python/Lib/email/mime/text.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Class representing text/* type MIME documents.""" - -__all__ = ['MIMEText'] - -from email.encoders import encode_7or8bit -from email.mime.nonmultipart import MIMENonMultipart - - - -class MIMEText(MIMENonMultipart): - """Class for generating text/* type MIME documents.""" - - def __init__(self, _text, _subtype='plain', _charset='us-ascii'): - """Create a text/* type MIME document. - - _text is the string for this message object. - - _subtype is the MIME sub content type, defaulting to "plain". - - _charset is the character set parameter added to the Content-Type - header. This defaults to "us-ascii". Note that as a side-effect, the - Content-Transfer-Encoding header will also be set. - """ - MIMENonMultipart.__init__(self, 'text', _subtype, - **{'charset': _charset}) - self.set_payload(_text, _charset) diff --git a/python/Lib/email/parser.py b/python/Lib/email/parser.py deleted file mode 100755 index 6dad32a3fc..0000000000 --- a/python/Lib/email/parser.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter -# Contact: email-sig@python.org - -"""A parser of RFC 2822 and MIME email messages.""" - -__all__ = ['Parser', 'HeaderParser'] - -import warnings -from cStringIO import StringIO - -from email.feedparser import FeedParser -from email.message import Message - - - -class Parser: - def __init__(self, *args, **kws): - """Parser of RFC 2822 and MIME email messages. - - Creates an in-memory object tree representing the email message, which - can then be manipulated and turned over to a Generator to return the - textual representation of the message. - - The string must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceded by a `Unix-from' header. The - header block is terminated either by the end of the string or by a - blank line. - - _class is the class to instantiate for new message objects when they - must be created. This class must have a constructor that can take - zero arguments. Default is Message.Message. - """ - if len(args) >= 1: - if '_class' in kws: - raise TypeError("Multiple values for keyword arg '_class'") - kws['_class'] = args[0] - if len(args) == 2: - if 'strict' in kws: - raise TypeError("Multiple values for keyword arg 'strict'") - kws['strict'] = args[1] - if len(args) > 2: - raise TypeError('Too many arguments') - if '_class' in kws: - self._class = kws['_class'] - del kws['_class'] - else: - self._class = Message - if 'strict' in kws: - warnings.warn("'strict' argument is deprecated (and ignored)", - DeprecationWarning, 2) - del kws['strict'] - if kws: - raise TypeError('Unexpected keyword arguments') - - def parse(self, fp, headersonly=False): - """Create a message structure from the data in a file. - - Reads all the data from the file and returns the root of the message - structure. Optional headersonly is a flag specifying whether to stop - parsing after reading the headers or not. The default is False, - meaning it parses the entire contents of the file. - """ - feedparser = FeedParser(self._class) - if headersonly: - feedparser._set_headersonly() - while True: - data = fp.read(8192) - if not data: - break - feedparser.feed(data) - return feedparser.close() - - def parsestr(self, text, headersonly=False): - """Create a message structure from a string. - - Returns the root of the message structure. Optional headersonly is a - flag specifying whether to stop parsing after reading the headers or - not. The default is False, meaning it parses the entire contents of - the file. - """ - return self.parse(StringIO(text), headersonly=headersonly) - - - -class HeaderParser(Parser): - def parse(self, fp, headersonly=True): - return Parser.parse(self, fp, True) - - def parsestr(self, text, headersonly=True): - return Parser.parsestr(self, text, True) diff --git a/python/Lib/email/quoprimime.py b/python/Lib/email/quoprimime.py deleted file mode 100755 index 1056b45bcd..0000000000 --- a/python/Lib/email/quoprimime.py +++ /dev/null @@ -1,336 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Ben Gertzfield -# Contact: email-sig@python.org - -"""Quoted-printable content transfer encoding per RFCs 2045-2047. - -This module handles the content transfer encoding method defined in RFC 2045 -to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to -safely encode text that is in a character set similar to the 7-bit US ASCII -character set, but that includes some 8-bit characters that are normally not -allowed in email bodies or headers. - -Quoted-printable is very space-inefficient for encoding binary files; use the -email.base64mime module for that instead. - -This module provides an interface to encode and decode both headers and bodies -with quoted-printable encoding. - -RFC 2045 defines a method for including character set information in an -`encoded-word' in a header. This method is commonly used for 8-bit real names -in To:/From:/Cc: etc. fields, as well as Subject: lines. - -This module does not do the line wrapping or end-of-line character -conversion necessary for proper internationalized headers; it only -does dumb encoding and decoding. To deal with the various line -wrapping issues, use the email.header module. -""" - -__all__ = [ - 'body_decode', - 'body_encode', - 'body_quopri_check', - 'body_quopri_len', - 'decode', - 'decodestring', - 'encode', - 'encodestring', - 'header_decode', - 'header_encode', - 'header_quopri_check', - 'header_quopri_len', - 'quote', - 'unquote', - ] - -import re - -from string import hexdigits -from email.utils import fix_eols - -CRLF = '\r\n' -NL = '\n' - -# See also Charset.py -MISC_LEN = 7 - -hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]') -bqre = re.compile(r'[^ !-<>-~\t]') - - - -# Helpers -def header_quopri_check(c): - """Return True if the character should be escaped with header quopri.""" - return bool(hqre.match(c)) - - -def body_quopri_check(c): - """Return True if the character should be escaped with body quopri.""" - return bool(bqre.match(c)) - - -def header_quopri_len(s): - """Return the length of str when it is encoded with header quopri.""" - count = 0 - for c in s: - if hqre.match(c): - count += 3 - else: - count += 1 - return count - - -def body_quopri_len(str): - """Return the length of str when it is encoded with body quopri.""" - count = 0 - for c in str: - if bqre.match(c): - count += 3 - else: - count += 1 - return count - - -def _max_append(L, s, maxlen, extra=''): - if not L: - L.append(s.lstrip()) - elif len(L[-1]) + len(s) <= maxlen: - L[-1] += extra + s - else: - L.append(s.lstrip()) - - -def unquote(s): - """Turn a string in the form =AB to the ASCII character with value 0xab""" - return chr(int(s[1:3], 16)) - - -def quote(c): - return "=%02X" % ord(c) - - - -def header_encode(header, charset="iso-8859-1", keep_eols=False, - maxlinelen=76, eol=NL): - """Encode a single header line with quoted-printable (like) encoding. - - Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but - used specifically for email header fields to allow charsets with mostly 7 - bit characters (and some 8 bit) to remain more or less readable in non-RFC - 2045 aware mail clients. - - charset names the character set to use to encode the header. It defaults - to iso-8859-1. - - The resulting string will be in the form: - - "=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n - =?charset?q?Silly_=C8nglish_Kn=EEghts?=" - - with each line wrapped safely at, at most, maxlinelen characters (defaults - to 76 characters). If maxlinelen is None, the entire string is encoded in - one chunk with no splitting. - - End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted - to the canonical email line separator \\r\\n unless the keep_eols - parameter is True (the default is False). - - Each line of the header will be terminated in the value of eol, which - defaults to "\\n". Set this to "\\r\\n" if you are using the result of - this function directly in email. - """ - # Return empty headers unchanged - if not header: - return header - - if not keep_eols: - header = fix_eols(header) - - # Quopri encode each line, in encoded chunks no greater than maxlinelen in - # length, after the RFC chrome is added in. - quoted = [] - if maxlinelen is None: - # An obnoxiously large number that's good enough - max_encoded = 100000 - else: - max_encoded = maxlinelen - len(charset) - MISC_LEN - 1 - - for c in header: - # Space may be represented as _ instead of =20 for readability - if c == ' ': - _max_append(quoted, '_', max_encoded) - # These characters can be included verbatim - elif not hqre.match(c): - _max_append(quoted, c, max_encoded) - # Otherwise, replace with hex value like =E2 - else: - _max_append(quoted, "=%02X" % ord(c), max_encoded) - - # Now add the RFC chrome to each encoded chunk and glue the chunks - # together. BAW: should we be able to specify the leading whitespace in - # the joiner? - joiner = eol + ' ' - return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted]) - - - -def encode(body, binary=False, maxlinelen=76, eol=NL): - """Encode with quoted-printable, wrapping at maxlinelen characters. - - If binary is False (the default), end-of-line characters will be converted - to the canonical email end-of-line sequence \\r\\n. Otherwise they will - be left verbatim. - - Each line of encoded text will end with eol, which defaults to "\\n". Set - this to "\\r\\n" if you will be using the result of this function directly - in an email. - - Each line will be wrapped at, at most, maxlinelen characters (defaults to - 76 characters). Long lines will have the `soft linefeed' quoted-printable - character "=" appended to them, so the decoded text will be identical to - the original text. - """ - if not body: - return body - - if not binary: - body = fix_eols(body) - - # BAW: We're accumulating the body text by string concatenation. That - # can't be very efficient, but I don't have time now to rewrite it. It - # just feels like this algorithm could be more efficient. - encoded_body = '' - lineno = -1 - # Preserve line endings here so we can check later to see an eol needs to - # be added to the output later. - lines = body.splitlines(1) - for line in lines: - # But strip off line-endings for processing this line. - if line.endswith(CRLF): - line = line[:-2] - elif line[-1] in CRLF: - line = line[:-1] - - lineno += 1 - encoded_line = '' - prev = None - linelen = len(line) - # Now we need to examine every character to see if it needs to be - # quopri encoded. BAW: again, string concatenation is inefficient. - for j in range(linelen): - c = line[j] - prev = c - if bqre.match(c): - c = quote(c) - elif j+1 == linelen: - # Check for whitespace at end of line; special case - if c not in ' \t': - encoded_line += c - prev = c - continue - # Check to see to see if the line has reached its maximum length - if len(encoded_line) + len(c) >= maxlinelen: - encoded_body += encoded_line + '=' + eol - encoded_line = '' - encoded_line += c - # Now at end of line.. - if prev and prev in ' \t': - # Special case for whitespace at end of file - if lineno + 1 == len(lines): - prev = quote(prev) - if len(encoded_line) + len(prev) > maxlinelen: - encoded_body += encoded_line + '=' + eol + prev - else: - encoded_body += encoded_line + prev - # Just normal whitespace at end of line - else: - encoded_body += encoded_line + prev + '=' + eol - encoded_line = '' - # Now look at the line we just finished and it has a line ending, we - # need to add eol to the end of the line. - if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF: - encoded_body += encoded_line + eol - else: - encoded_body += encoded_line - encoded_line = '' - return encoded_body - - -# For convenience and backwards compatibility w/ standard base64 module -body_encode = encode -encodestring = encode - - - -# BAW: I'm not sure if the intent was for the signature of this function to be -# the same as base64MIME.decode() or not... -def decode(encoded, eol=NL): - """Decode a quoted-printable string. - - Lines are separated with eol, which defaults to \\n. - """ - if not encoded: - return encoded - # BAW: see comment in encode() above. Again, we're building up the - # decoded string with string concatenation, which could be done much more - # efficiently. - decoded = '' - - for line in encoded.splitlines(): - line = line.rstrip() - if not line: - decoded += eol - continue - - i = 0 - n = len(line) - while i < n: - c = line[i] - if c != '=': - decoded += c - i += 1 - # Otherwise, c == "=". Are we at the end of the line? If so, add - # a soft line break. - elif i+1 == n: - i += 1 - continue - # Decode if in form =AB - elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: - decoded += unquote(line[i:i+3]) - i += 3 - # Otherwise, not in form =AB, pass literally - else: - decoded += c - i += 1 - - if i == n: - decoded += eol - # Special case if original string did not end with eol - if not encoded.endswith(eol) and decoded.endswith(eol): - decoded = decoded[:-1] - return decoded - - -# For convenience and backwards compatibility w/ standard base64 module -body_decode = decode -decodestring = decode - - - -def _unquote_match(match): - """Turn a match in the form =AB to the ASCII character with value 0xab""" - s = match.group(0) - return unquote(s) - - -# Header decoding is done a bit differently -def header_decode(s): - """Decode a string encoded with RFC 2045 MIME header `Q' encoding. - - This function does not parse a full MIME header value encoded with - quoted-printable (like =?iso-8859-1?q?Hello_World?=) -- please use - the high level email.header class for that functionality. - """ - s = s.replace('_', ' ') - return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s) diff --git a/python/Lib/email/utils.py b/python/Lib/email/utils.py deleted file mode 100755 index ac13f49d59..0000000000 --- a/python/Lib/email/utils.py +++ /dev/null @@ -1,323 +0,0 @@ -# Copyright (C) 2001-2010 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Miscellaneous utilities.""" - -__all__ = [ - 'collapse_rfc2231_value', - 'decode_params', - 'decode_rfc2231', - 'encode_rfc2231', - 'formataddr', - 'formatdate', - 'getaddresses', - 'make_msgid', - 'mktime_tz', - 'parseaddr', - 'parsedate', - 'parsedate_tz', - 'unquote', - ] - -import os -import re -import time -import base64 -import random -import socket -import urllib -import warnings - -from email._parseaddr import quote -from email._parseaddr import AddressList as _AddressList -from email._parseaddr import mktime_tz - -# We need wormarounds for bugs in these methods in older Pythons (see below) -from email._parseaddr import parsedate as _parsedate -from email._parseaddr import parsedate_tz as _parsedate_tz - -from quopri import decodestring as _qdecode - -# Intrapackage imports -from email.encoders import _bencode, _qencode - -COMMASPACE = ', ' -EMPTYSTRING = '' -UEMPTYSTRING = u'' -CRLF = '\r\n' -TICK = "'" - -specialsre = re.compile(r'[][\\()<>@,:;".]') -escapesre = re.compile(r'[][\\()"]') - - - -# Helpers - -def _identity(s): - return s - - -def _bdecode(s): - """Decodes a base64 string. - - This function is equivalent to base64.decodestring and it's retained only - for backward compatibility. It used to remove the last \\n of the decoded - string, if it had any (see issue 7143). - """ - if not s: - return s - return base64.decodestring(s) - - - -def fix_eols(s): - """Replace all line-ending characters with \\r\\n.""" - # Fix newlines with no preceding carriage return - s = re.sub(r'(?', name) - return '%s%s%s <%s>' % (quotes, name, quotes, address) - return address - - - -def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(fieldvalues) - a = _AddressList(all) - return a.addresslist - - - -ecre = re.compile(r''' - =\? # literal =? - (?P[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P.*?) # non-greedy up to the next ?= is the atom - \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE) - - - -def formatdate(timeval=None, localtime=False, usegmt=False): - """Returns a date string as specified by RFC 2822, e.g.: - - Fri, 09 Nov 2001 01:08:47 -0000 - - Optional timeval if given is a floating point time value as accepted by - gmtime() and localtime(), otherwise the current time is used. - - Optional localtime is a flag that when True, interprets timeval, and - returns a date relative to the local timezone instead of UTC, properly - taking daylight savings time into account. - - Optional argument usegmt means that the timezone is written out as - an ascii string, not numeric one (so "GMT" instead of "+0000"). This - is needed for HTTP, and is only used when localtime==False. - """ - # Note: we cannot use strftime() because that honors the locale and RFC - # 2822 requires that day and month names be the English abbreviations. - if timeval is None: - timeval = time.time() - if localtime: - now = time.localtime(timeval) - # Calculate timezone offset, based on whether the local zone has - # daylight savings time, and whether DST is in effect. - if time.daylight and now[-1]: - offset = time.altzone - else: - offset = time.timezone - hours, minutes = divmod(abs(offset), 3600) - # Remember offset is in seconds west of UTC, but the timezone is in - # minutes east of UTC, so the signs differ. - if offset > 0: - sign = '-' - else: - sign = '+' - zone = '%s%02d%02d' % (sign, hours, minutes // 60) - else: - now = time.gmtime(timeval) - # Timezone offset is always -0000 - if usegmt: - zone = 'GMT' - else: - zone = '-0000' - return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( - ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]], - now[2], - ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], - now[0], now[3], now[4], now[5], - zone) - - - -def make_msgid(idstring=None): - """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: - - <142480216486.20800.16526388040877946887@nightshade.la.mastaler.com> - - Optional idstring if given is a string used to strengthen the - uniqueness of the message id. - """ - timeval = int(time.time()*100) - pid = os.getpid() - randint = random.getrandbits(64) - if idstring is None: - idstring = '' - else: - idstring = '.' + idstring - idhost = socket.getfqdn() - msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, idhost) - return msgid - - - -# These functions are in the standalone mimelib version only because they've -# subsequently been fixed in the latest Python versions. We use this to worm -# around broken older Pythons. -def parsedate(data): - if not data: - return None - return _parsedate(data) - - -def parsedate_tz(data): - if not data: - return None - return _parsedate_tz(data) - - -def parseaddr(addr): - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' - return addrs[0] - - -# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. -def unquote(str): - """Remove quotes from a string.""" - if len(str) > 1: - if str.startswith('"') and str.endswith('"'): - return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') - if str.startswith('<') and str.endswith('>'): - return str[1:-1] - return str - - - -# RFC2231-related functions - parameter encoding and decoding -def decode_rfc2231(s): - """Decode string according to RFC 2231""" - parts = s.split(TICK, 2) - if len(parts) <= 2: - return None, None, s - return parts - - -def encode_rfc2231(s, charset=None, language=None): - """Encode string according to RFC 2231. - - If neither charset nor language is given, then s is returned as-is. If - charset is given but not language, the string is encoded using the empty - string for language. - """ - import urllib - s = urllib.quote(s, safe='') - if charset is None and language is None: - return s - if language is None: - language = '' - return "%s'%s'%s" % (charset, language, s) - - -rfc2231_continuation = re.compile(r'^(?P\w+)\*((?P[0-9]+)\*?)?$') - -def decode_params(params): - """Decode parameters list according to RFC 2231. - - params is a sequence of 2-tuples containing (param name, string value). - """ - # Copy params so we don't mess with the original - params = params[:] - new_params = [] - # Map parameter's name to a list of continuations. The values are a - # 3-tuple of the continuation number, the string value, and a flag - # specifying whether a particular segment is %-encoded. - rfc2231_params = {} - name, value = params.pop(0) - new_params.append((name, value)) - while params: - name, value = params.pop(0) - if name.endswith('*'): - encoded = True - else: - encoded = False - value = unquote(value) - mo = rfc2231_continuation.match(name) - if mo: - name, num = mo.group('name', 'num') - if num is not None: - num = int(num) - rfc2231_params.setdefault(name, []).append((num, value, encoded)) - else: - new_params.append((name, '"%s"' % quote(value))) - if rfc2231_params: - for name, continuations in rfc2231_params.items(): - value = [] - extended = False - # Sort by number - continuations.sort() - # And now append all values in numerical order, converting - # %-encodings for the encoded segments. If any of the - # continuation names ends in a *, then the entire string, after - # decoding segments and concatenating, must have the charset and - # language specifiers at the beginning of the string. - for num, s, encoded in continuations: - if encoded: - s = urllib.unquote(s) - extended = True - value.append(s) - value = quote(EMPTYSTRING.join(value)) - if extended: - charset, language, value = decode_rfc2231(value) - new_params.append((name, (charset, language, '"%s"' % value))) - else: - new_params.append((name, '"%s"' % value)) - return new_params - -def collapse_rfc2231_value(value, errors='replace', - fallback_charset='us-ascii'): - if isinstance(value, tuple): - rawval = unquote(value[2]) - charset = value[0] or 'us-ascii' - try: - return unicode(rawval, charset, errors) - except LookupError: - # XXX charset is unknown to Python. - return unicode(rawval, fallback_charset, errors) - else: - return unquote(value) diff --git a/python/Lib/encodings/__init__.py b/python/Lib/encodings/__init__.py deleted file mode 100755 index b85ca823ae..0000000000 --- a/python/Lib/encodings/__init__.py +++ /dev/null @@ -1,157 +0,0 @@ -""" Standard "encodings" Package - - Standard Python encoding modules are stored in this package - directory. - - Codec modules must have names corresponding to normalized encoding - names as defined in the normalize_encoding() function below, e.g. - 'utf-8' must be implemented by the module 'utf_8.py'. - - Each codec module must export the following interface: - - * getregentry() -> codecs.CodecInfo object - The getregentry() API must a CodecInfo object with encoder, decoder, - incrementalencoder, incrementaldecoder, streamwriter and streamreader - atttributes which adhere to the Python Codec Interface Standard. - - In addition, a module may optionally also define the following - APIs which are then used by the package's codec search function: - - * getaliases() -> sequence of encoding name strings to use as aliases - - Alias names returned by getaliases() must be normalized encoding - names as defined by normalize_encoding(). - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -"""#" - -import codecs -from encodings import aliases -import __builtin__ - -_cache = {} -_unknown = '--unknown--' -_import_tail = ['*'] -_norm_encoding_map = (' . ' - '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ ' - ' abcdefghijklmnopqrstuvwxyz ' - ' ' - ' ' - ' ') -_aliases = aliases.aliases - -class CodecRegistryError(LookupError, SystemError): - pass - -def normalize_encoding(encoding): - - """ Normalize an encoding name. - - Normalization works as follows: all non-alphanumeric - characters except the dot used for Python package names are - collapsed and replaced with a single underscore, e.g. ' -;#' - becomes '_'. Leading and trailing underscores are removed. - - Note that encoding names should be ASCII only; if they do use - non-ASCII characters, these must be Latin-1 compatible. - - """ - # Make sure we have an 8-bit string, because .translate() works - # differently for Unicode strings. - if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode): - # Note that .encode('latin-1') does *not* use the codec - # registry, so this call doesn't recurse. (See unicodeobject.c - # PyUnicode_AsEncodedString() for details) - encoding = encoding.encode('latin-1') - return '_'.join(encoding.translate(_norm_encoding_map).split()) - -def search_function(encoding): - - # Cache lookup - entry = _cache.get(encoding, _unknown) - if entry is not _unknown: - return entry - - # Import the module: - # - # First try to find an alias for the normalized encoding - # name and lookup the module using the aliased name, then try to - # lookup the module using the standard import scheme, i.e. first - # try in the encodings package, then at top-level. - # - norm_encoding = normalize_encoding(encoding) - aliased_encoding = _aliases.get(norm_encoding) or \ - _aliases.get(norm_encoding.replace('.', '_')) - if aliased_encoding is not None: - modnames = [aliased_encoding, - norm_encoding] - else: - modnames = [norm_encoding] - for modname in modnames: - if not modname or '.' in modname: - continue - try: - # Import is absolute to prevent the possibly malicious import of a - # module with side-effects that is not in the 'encodings' package. - mod = __import__('encodings.' + modname, fromlist=_import_tail, - level=0) - except ImportError: - pass - else: - break - else: - mod = None - - try: - getregentry = mod.getregentry - except AttributeError: - # Not a codec module - mod = None - - if mod is None: - # Cache misses - _cache[encoding] = None - return None - - # Now ask the module for the registry entry - entry = getregentry() - if not isinstance(entry, codecs.CodecInfo): - if not 4 <= len(entry) <= 7: - raise CodecRegistryError,\ - 'module "%s" (%s) failed to register' % \ - (mod.__name__, mod.__file__) - if not hasattr(entry[0], '__call__') or \ - not hasattr(entry[1], '__call__') or \ - (entry[2] is not None and not hasattr(entry[2], '__call__')) or \ - (entry[3] is not None and not hasattr(entry[3], '__call__')) or \ - (len(entry) > 4 and entry[4] is not None and not hasattr(entry[4], '__call__')) or \ - (len(entry) > 5 and entry[5] is not None and not hasattr(entry[5], '__call__')): - raise CodecRegistryError,\ - 'incompatible codecs in module "%s" (%s)' % \ - (mod.__name__, mod.__file__) - if len(entry)<7 or entry[6] is None: - entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],) - entry = codecs.CodecInfo(*entry) - - # Cache the codec registry entry - _cache[encoding] = entry - - # Register its aliases (without overwriting previously registered - # aliases) - try: - codecaliases = mod.getaliases() - except AttributeError: - pass - else: - for alias in codecaliases: - if alias not in _aliases: - _aliases[alias] = modname - - # Return the registry entry - return entry - -# Register the search_function in the Python codec registry -codecs.register(search_function) diff --git a/python/Lib/encodings/aliases.py b/python/Lib/encodings/aliases.py deleted file mode 100755 index a54cf774b7..0000000000 --- a/python/Lib/encodings/aliases.py +++ /dev/null @@ -1,527 +0,0 @@ -""" Encoding Aliases Support - - This module is used by the encodings package search function to - map encodings names to module names. - - Note that the search function normalizes the encoding names before - doing the lookup, so the mapping will have to map normalized - encoding names to module names. - - Contents: - - The following aliases dictionary contains mappings of all IANA - character set names for which the Python core library provides - codecs. In addition to these, a few Python specific codec - aliases have also been added. - -""" -aliases = { - - # Please keep this list sorted alphabetically by value ! - - # ascii codec - '646' : 'ascii', - 'ansi_x3.4_1968' : 'ascii', - 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name - 'ansi_x3.4_1986' : 'ascii', - 'cp367' : 'ascii', - 'csascii' : 'ascii', - 'ibm367' : 'ascii', - 'iso646_us' : 'ascii', - 'iso_646.irv_1991' : 'ascii', - 'iso_ir_6' : 'ascii', - 'us' : 'ascii', - 'us_ascii' : 'ascii', - - # base64_codec codec - 'base64' : 'base64_codec', - 'base_64' : 'base64_codec', - - # big5 codec - 'big5_tw' : 'big5', - 'csbig5' : 'big5', - - # big5hkscs codec - 'big5_hkscs' : 'big5hkscs', - 'hkscs' : 'big5hkscs', - - # bz2_codec codec - 'bz2' : 'bz2_codec', - - # cp037 codec - '037' : 'cp037', - 'csibm037' : 'cp037', - 'ebcdic_cp_ca' : 'cp037', - 'ebcdic_cp_nl' : 'cp037', - 'ebcdic_cp_us' : 'cp037', - 'ebcdic_cp_wt' : 'cp037', - 'ibm037' : 'cp037', - 'ibm039' : 'cp037', - - # cp1026 codec - '1026' : 'cp1026', - 'csibm1026' : 'cp1026', - 'ibm1026' : 'cp1026', - - # cp1140 codec - '1140' : 'cp1140', - 'ibm1140' : 'cp1140', - - # cp1250 codec - '1250' : 'cp1250', - 'windows_1250' : 'cp1250', - - # cp1251 codec - '1251' : 'cp1251', - 'windows_1251' : 'cp1251', - - # cp1252 codec - '1252' : 'cp1252', - 'windows_1252' : 'cp1252', - - # cp1253 codec - '1253' : 'cp1253', - 'windows_1253' : 'cp1253', - - # cp1254 codec - '1254' : 'cp1254', - 'windows_1254' : 'cp1254', - - # cp1255 codec - '1255' : 'cp1255', - 'windows_1255' : 'cp1255', - - # cp1256 codec - '1256' : 'cp1256', - 'windows_1256' : 'cp1256', - - # cp1257 codec - '1257' : 'cp1257', - 'windows_1257' : 'cp1257', - - # cp1258 codec - '1258' : 'cp1258', - 'windows_1258' : 'cp1258', - - # cp424 codec - '424' : 'cp424', - 'csibm424' : 'cp424', - 'ebcdic_cp_he' : 'cp424', - 'ibm424' : 'cp424', - - # cp437 codec - '437' : 'cp437', - 'cspc8codepage437' : 'cp437', - 'ibm437' : 'cp437', - - # cp500 codec - '500' : 'cp500', - 'csibm500' : 'cp500', - 'ebcdic_cp_be' : 'cp500', - 'ebcdic_cp_ch' : 'cp500', - 'ibm500' : 'cp500', - - # cp775 codec - '775' : 'cp775', - 'cspc775baltic' : 'cp775', - 'ibm775' : 'cp775', - - # cp850 codec - '850' : 'cp850', - 'cspc850multilingual' : 'cp850', - 'ibm850' : 'cp850', - - # cp852 codec - '852' : 'cp852', - 'cspcp852' : 'cp852', - 'ibm852' : 'cp852', - - # cp855 codec - '855' : 'cp855', - 'csibm855' : 'cp855', - 'ibm855' : 'cp855', - - # cp857 codec - '857' : 'cp857', - 'csibm857' : 'cp857', - 'ibm857' : 'cp857', - - # cp858 codec - '858' : 'cp858', - 'csibm858' : 'cp858', - 'ibm858' : 'cp858', - - # cp860 codec - '860' : 'cp860', - 'csibm860' : 'cp860', - 'ibm860' : 'cp860', - - # cp861 codec - '861' : 'cp861', - 'cp_is' : 'cp861', - 'csibm861' : 'cp861', - 'ibm861' : 'cp861', - - # cp862 codec - '862' : 'cp862', - 'cspc862latinhebrew' : 'cp862', - 'ibm862' : 'cp862', - - # cp863 codec - '863' : 'cp863', - 'csibm863' : 'cp863', - 'ibm863' : 'cp863', - - # cp864 codec - '864' : 'cp864', - 'csibm864' : 'cp864', - 'ibm864' : 'cp864', - - # cp865 codec - '865' : 'cp865', - 'csibm865' : 'cp865', - 'ibm865' : 'cp865', - - # cp866 codec - '866' : 'cp866', - 'csibm866' : 'cp866', - 'ibm866' : 'cp866', - - # cp869 codec - '869' : 'cp869', - 'cp_gr' : 'cp869', - 'csibm869' : 'cp869', - 'ibm869' : 'cp869', - - # cp932 codec - '932' : 'cp932', - 'ms932' : 'cp932', - 'mskanji' : 'cp932', - 'ms_kanji' : 'cp932', - - # cp949 codec - '949' : 'cp949', - 'ms949' : 'cp949', - 'uhc' : 'cp949', - - # cp950 codec - '950' : 'cp950', - 'ms950' : 'cp950', - - # euc_jis_2004 codec - 'jisx0213' : 'euc_jis_2004', - 'eucjis2004' : 'euc_jis_2004', - 'euc_jis2004' : 'euc_jis_2004', - - # euc_jisx0213 codec - 'eucjisx0213' : 'euc_jisx0213', - - # euc_jp codec - 'eucjp' : 'euc_jp', - 'ujis' : 'euc_jp', - 'u_jis' : 'euc_jp', - - # euc_kr codec - 'euckr' : 'euc_kr', - 'korean' : 'euc_kr', - 'ksc5601' : 'euc_kr', - 'ks_c_5601' : 'euc_kr', - 'ks_c_5601_1987' : 'euc_kr', - 'ksx1001' : 'euc_kr', - 'ks_x_1001' : 'euc_kr', - - # gb18030 codec - 'gb18030_2000' : 'gb18030', - - # gb2312 codec - 'chinese' : 'gb2312', - 'csiso58gb231280' : 'gb2312', - 'euc_cn' : 'gb2312', - 'euccn' : 'gb2312', - 'eucgb2312_cn' : 'gb2312', - 'gb2312_1980' : 'gb2312', - 'gb2312_80' : 'gb2312', - 'iso_ir_58' : 'gb2312', - - # gbk codec - '936' : 'gbk', - 'cp936' : 'gbk', - 'ms936' : 'gbk', - - # hex_codec codec - 'hex' : 'hex_codec', - - # hp_roman8 codec - 'roman8' : 'hp_roman8', - 'r8' : 'hp_roman8', - 'csHPRoman8' : 'hp_roman8', - - # hz codec - 'hzgb' : 'hz', - 'hz_gb' : 'hz', - 'hz_gb_2312' : 'hz', - - # iso2022_jp codec - 'csiso2022jp' : 'iso2022_jp', - 'iso2022jp' : 'iso2022_jp', - 'iso_2022_jp' : 'iso2022_jp', - - # iso2022_jp_1 codec - 'iso2022jp_1' : 'iso2022_jp_1', - 'iso_2022_jp_1' : 'iso2022_jp_1', - - # iso2022_jp_2 codec - 'iso2022jp_2' : 'iso2022_jp_2', - 'iso_2022_jp_2' : 'iso2022_jp_2', - - # iso2022_jp_2004 codec - 'iso_2022_jp_2004' : 'iso2022_jp_2004', - 'iso2022jp_2004' : 'iso2022_jp_2004', - - # iso2022_jp_3 codec - 'iso2022jp_3' : 'iso2022_jp_3', - 'iso_2022_jp_3' : 'iso2022_jp_3', - - # iso2022_jp_ext codec - 'iso2022jp_ext' : 'iso2022_jp_ext', - 'iso_2022_jp_ext' : 'iso2022_jp_ext', - - # iso2022_kr codec - 'csiso2022kr' : 'iso2022_kr', - 'iso2022kr' : 'iso2022_kr', - 'iso_2022_kr' : 'iso2022_kr', - - # iso8859_10 codec - 'csisolatin6' : 'iso8859_10', - 'iso_8859_10' : 'iso8859_10', - 'iso_8859_10_1992' : 'iso8859_10', - 'iso_ir_157' : 'iso8859_10', - 'l6' : 'iso8859_10', - 'latin6' : 'iso8859_10', - - # iso8859_11 codec - 'thai' : 'iso8859_11', - 'iso_8859_11' : 'iso8859_11', - 'iso_8859_11_2001' : 'iso8859_11', - - # iso8859_13 codec - 'iso_8859_13' : 'iso8859_13', - 'l7' : 'iso8859_13', - 'latin7' : 'iso8859_13', - - # iso8859_14 codec - 'iso_8859_14' : 'iso8859_14', - 'iso_8859_14_1998' : 'iso8859_14', - 'iso_celtic' : 'iso8859_14', - 'iso_ir_199' : 'iso8859_14', - 'l8' : 'iso8859_14', - 'latin8' : 'iso8859_14', - - # iso8859_15 codec - 'iso_8859_15' : 'iso8859_15', - 'l9' : 'iso8859_15', - 'latin9' : 'iso8859_15', - - # iso8859_16 codec - 'iso_8859_16' : 'iso8859_16', - 'iso_8859_16_2001' : 'iso8859_16', - 'iso_ir_226' : 'iso8859_16', - 'l10' : 'iso8859_16', - 'latin10' : 'iso8859_16', - - # iso8859_2 codec - 'csisolatin2' : 'iso8859_2', - 'iso_8859_2' : 'iso8859_2', - 'iso_8859_2_1987' : 'iso8859_2', - 'iso_ir_101' : 'iso8859_2', - 'l2' : 'iso8859_2', - 'latin2' : 'iso8859_2', - - # iso8859_3 codec - 'csisolatin3' : 'iso8859_3', - 'iso_8859_3' : 'iso8859_3', - 'iso_8859_3_1988' : 'iso8859_3', - 'iso_ir_109' : 'iso8859_3', - 'l3' : 'iso8859_3', - 'latin3' : 'iso8859_3', - - # iso8859_4 codec - 'csisolatin4' : 'iso8859_4', - 'iso_8859_4' : 'iso8859_4', - 'iso_8859_4_1988' : 'iso8859_4', - 'iso_ir_110' : 'iso8859_4', - 'l4' : 'iso8859_4', - 'latin4' : 'iso8859_4', - - # iso8859_5 codec - 'csisolatincyrillic' : 'iso8859_5', - 'cyrillic' : 'iso8859_5', - 'iso_8859_5' : 'iso8859_5', - 'iso_8859_5_1988' : 'iso8859_5', - 'iso_ir_144' : 'iso8859_5', - - # iso8859_6 codec - 'arabic' : 'iso8859_6', - 'asmo_708' : 'iso8859_6', - 'csisolatinarabic' : 'iso8859_6', - 'ecma_114' : 'iso8859_6', - 'iso_8859_6' : 'iso8859_6', - 'iso_8859_6_1987' : 'iso8859_6', - 'iso_ir_127' : 'iso8859_6', - - # iso8859_7 codec - 'csisolatingreek' : 'iso8859_7', - 'ecma_118' : 'iso8859_7', - 'elot_928' : 'iso8859_7', - 'greek' : 'iso8859_7', - 'greek8' : 'iso8859_7', - 'iso_8859_7' : 'iso8859_7', - 'iso_8859_7_1987' : 'iso8859_7', - 'iso_ir_126' : 'iso8859_7', - - # iso8859_8 codec - 'csisolatinhebrew' : 'iso8859_8', - 'hebrew' : 'iso8859_8', - 'iso_8859_8' : 'iso8859_8', - 'iso_8859_8_1988' : 'iso8859_8', - 'iso_ir_138' : 'iso8859_8', - - # iso8859_9 codec - 'csisolatin5' : 'iso8859_9', - 'iso_8859_9' : 'iso8859_9', - 'iso_8859_9_1989' : 'iso8859_9', - 'iso_ir_148' : 'iso8859_9', - 'l5' : 'iso8859_9', - 'latin5' : 'iso8859_9', - - # johab codec - 'cp1361' : 'johab', - 'ms1361' : 'johab', - - # koi8_r codec - 'cskoi8r' : 'koi8_r', - - # latin_1 codec - # - # Note that the latin_1 codec is implemented internally in C and a - # lot faster than the charmap codec iso8859_1 which uses the same - # encoding. This is why we discourage the use of the iso8859_1 - # codec and alias it to latin_1 instead. - # - '8859' : 'latin_1', - 'cp819' : 'latin_1', - 'csisolatin1' : 'latin_1', - 'ibm819' : 'latin_1', - 'iso8859' : 'latin_1', - 'iso8859_1' : 'latin_1', - 'iso_8859_1' : 'latin_1', - 'iso_8859_1_1987' : 'latin_1', - 'iso_ir_100' : 'latin_1', - 'l1' : 'latin_1', - 'latin' : 'latin_1', - 'latin1' : 'latin_1', - - # mac_cyrillic codec - 'maccyrillic' : 'mac_cyrillic', - - # mac_greek codec - 'macgreek' : 'mac_greek', - - # mac_iceland codec - 'maciceland' : 'mac_iceland', - - # mac_latin2 codec - 'maccentraleurope' : 'mac_latin2', - 'maclatin2' : 'mac_latin2', - - # mac_roman codec - 'macroman' : 'mac_roman', - - # mac_turkish codec - 'macturkish' : 'mac_turkish', - - # mbcs codec - 'dbcs' : 'mbcs', - - # ptcp154 codec - 'csptcp154' : 'ptcp154', - 'pt154' : 'ptcp154', - 'cp154' : 'ptcp154', - 'cyrillic_asian' : 'ptcp154', - - # quopri_codec codec - 'quopri' : 'quopri_codec', - 'quoted_printable' : 'quopri_codec', - 'quotedprintable' : 'quopri_codec', - - # rot_13 codec - 'rot13' : 'rot_13', - - # shift_jis codec - 'csshiftjis' : 'shift_jis', - 'shiftjis' : 'shift_jis', - 'sjis' : 'shift_jis', - 's_jis' : 'shift_jis', - - # shift_jis_2004 codec - 'shiftjis2004' : 'shift_jis_2004', - 'sjis_2004' : 'shift_jis_2004', - 's_jis_2004' : 'shift_jis_2004', - - # shift_jisx0213 codec - 'shiftjisx0213' : 'shift_jisx0213', - 'sjisx0213' : 'shift_jisx0213', - 's_jisx0213' : 'shift_jisx0213', - - # tactis codec - 'tis260' : 'tactis', - - # tis_620 codec - 'tis620' : 'tis_620', - 'tis_620_0' : 'tis_620', - 'tis_620_2529_0' : 'tis_620', - 'tis_620_2529_1' : 'tis_620', - 'iso_ir_166' : 'tis_620', - - # utf_16 codec - 'u16' : 'utf_16', - 'utf16' : 'utf_16', - - # utf_16_be codec - 'unicodebigunmarked' : 'utf_16_be', - 'utf_16be' : 'utf_16_be', - - # utf_16_le codec - 'unicodelittleunmarked' : 'utf_16_le', - 'utf_16le' : 'utf_16_le', - - # utf_32 codec - 'u32' : 'utf_32', - 'utf32' : 'utf_32', - - # utf_32_be codec - 'utf_32be' : 'utf_32_be', - - # utf_32_le codec - 'utf_32le' : 'utf_32_le', - - # utf_7 codec - 'u7' : 'utf_7', - 'utf7' : 'utf_7', - 'unicode_1_1_utf_7' : 'utf_7', - - # utf_8 codec - 'u8' : 'utf_8', - 'utf' : 'utf_8', - 'utf8' : 'utf_8', - 'utf8_ucs2' : 'utf_8', - 'utf8_ucs4' : 'utf_8', - - # uu_codec codec - 'uu' : 'uu_codec', - - # zlib_codec codec - 'zip' : 'zlib_codec', - 'zlib' : 'zlib_codec', - -} diff --git a/python/Lib/encodings/ascii.py b/python/Lib/encodings/ascii.py deleted file mode 100755 index 2033cde974..0000000000 --- a/python/Lib/encodings/ascii.py +++ /dev/null @@ -1,50 +0,0 @@ -""" Python 'ascii' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.ascii_encode - decode = codecs.ascii_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.ascii_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.ascii_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -class StreamConverter(StreamWriter,StreamReader): - - encode = codecs.ascii_decode - decode = codecs.ascii_encode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='ascii', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/python/Lib/encodings/base64_codec.py b/python/Lib/encodings/base64_codec.py deleted file mode 100755 index 34ac555428..0000000000 --- a/python/Lib/encodings/base64_codec.py +++ /dev/null @@ -1,80 +0,0 @@ -""" Python 'base64_codec' Codec - base64 content transfer encoding - - Unlike most of the other codecs which target Unicode, this codec - will return Python string objects for both encode and decode. - - Written by Marc-Andre Lemburg (mal@lemburg.com). - -""" -import codecs, base64 - -### Codec APIs - -def base64_encode(input,errors='strict'): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = base64.encodestring(input) - return (output, len(input)) - -def base64_decode(input,errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = base64.decodestring(input) - return (output, len(input)) - -class Codec(codecs.Codec): - - def encode(self, input,errors='strict'): - return base64_encode(input,errors) - def decode(self, input,errors='strict'): - return base64_decode(input,errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - assert self.errors == 'strict' - return base64.encodestring(input) - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - assert self.errors == 'strict' - return base64.decodestring(input) - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='base64', - encode=base64_encode, - decode=base64_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - _is_text_encoding=False, - ) diff --git a/python/Lib/encodings/big5.py b/python/Lib/encodings/big5.py deleted file mode 100755 index 7adeb0e160..0000000000 --- a/python/Lib/encodings/big5.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# big5.py: Python Unicode Codec for BIG5 -# -# Written by Hye-Shik Chang -# - -import _codecs_tw, codecs -import _multibytecodec as mbc - -codec = _codecs_tw.getcodec('big5') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='big5', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/big5hkscs.py b/python/Lib/encodings/big5hkscs.py deleted file mode 100755 index 350df37baa..0000000000 --- a/python/Lib/encodings/big5hkscs.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# big5hkscs.py: Python Unicode Codec for BIG5HKSCS -# -# Written by Hye-Shik Chang -# - -import _codecs_hk, codecs -import _multibytecodec as mbc - -codec = _codecs_hk.getcodec('big5hkscs') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='big5hkscs', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/bz2_codec.py b/python/Lib/encodings/bz2_codec.py deleted file mode 100755 index 136503ac1e..0000000000 --- a/python/Lib/encodings/bz2_codec.py +++ /dev/null @@ -1,103 +0,0 @@ -""" Python 'bz2_codec' Codec - bz2 compression encoding - - Unlike most of the other codecs which target Unicode, this codec - will return Python string objects for both encode and decode. - - Adapted by Raymond Hettinger from zlib_codec.py which was written - by Marc-Andre Lemburg (mal@lemburg.com). - -""" -import codecs -import bz2 # this codec needs the optional bz2 module ! - -### Codec APIs - -def bz2_encode(input,errors='strict'): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = bz2.compress(input) - return (output, len(input)) - -def bz2_decode(input,errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = bz2.decompress(input) - return (output, len(input)) - -class Codec(codecs.Codec): - - def encode(self, input, errors='strict'): - return bz2_encode(input, errors) - def decode(self, input, errors='strict'): - return bz2_decode(input, errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict'): - assert errors == 'strict' - self.errors = errors - self.compressobj = bz2.BZ2Compressor() - - def encode(self, input, final=False): - if final: - c = self.compressobj.compress(input) - return c + self.compressobj.flush() - else: - return self.compressobj.compress(input) - - def reset(self): - self.compressobj = bz2.BZ2Compressor() - -class IncrementalDecoder(codecs.IncrementalDecoder): - def __init__(self, errors='strict'): - assert errors == 'strict' - self.errors = errors - self.decompressobj = bz2.BZ2Decompressor() - - def decode(self, input, final=False): - try: - return self.decompressobj.decompress(input) - except EOFError: - return '' - - def reset(self): - self.decompressobj = bz2.BZ2Decompressor() - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name="bz2", - encode=bz2_encode, - decode=bz2_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - _is_text_encoding=False, - ) diff --git a/python/Lib/encodings/charmap.py b/python/Lib/encodings/charmap.py deleted file mode 100755 index 81189b161a..0000000000 --- a/python/Lib/encodings/charmap.py +++ /dev/null @@ -1,69 +0,0 @@ -""" Generic Python Character Mapping Codec. - - Use this codec directly rather than through the automatic - conversion mechanisms supplied by unicode() and .encode(). - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.charmap_encode - decode = codecs.charmap_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict', mapping=None): - codecs.IncrementalEncoder.__init__(self, errors) - self.mapping = mapping - - def encode(self, input, final=False): - return codecs.charmap_encode(input, self.errors, self.mapping)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def __init__(self, errors='strict', mapping=None): - codecs.IncrementalDecoder.__init__(self, errors) - self.mapping = mapping - - def decode(self, input, final=False): - return codecs.charmap_decode(input, self.errors, self.mapping)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - - def __init__(self,stream,errors='strict',mapping=None): - codecs.StreamWriter.__init__(self,stream,errors) - self.mapping = mapping - - def encode(self,input,errors='strict'): - return Codec.encode(input,errors,self.mapping) - -class StreamReader(Codec,codecs.StreamReader): - - def __init__(self,stream,errors='strict',mapping=None): - codecs.StreamReader.__init__(self,stream,errors) - self.mapping = mapping - - def decode(self,input,errors='strict'): - return Codec.decode(input,errors,self.mapping) - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='charmap', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/python/Lib/encodings/cp037.py b/python/Lib/encodings/cp037.py deleted file mode 100755 index c802b899af..0000000000 --- a/python/Lib/encodings/cp037.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp037 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp037', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xa2' # 0x4A -> CENT SIGN - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'|' # 0x4F -> VERTICAL LINE - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'!' # 0x5A -> EXCLAMATION MARK - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'\xac' # 0x5F -> NOT SIGN - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xAF -> REGISTERED SIGN - u'^' # 0xB0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xBA -> LEFT SQUARE BRACKET - u']' # 0xBB -> RIGHT SQUARE BRACKET - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1006.py b/python/Lib/encodings/cp1006.py deleted file mode 100755 index e21e804eb9..0000000000 --- a/python/Lib/encodings/cp1006.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1006 generated from 'MAPPINGS/VENDORS/MISC/CP1006.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1006', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u06f0' # 0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO - u'\u06f1' # 0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE - u'\u06f2' # 0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO - u'\u06f3' # 0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE - u'\u06f4' # 0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR - u'\u06f5' # 0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE - u'\u06f6' # 0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX - u'\u06f7' # 0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN - u'\u06f8' # 0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT - u'\u06f9' # 0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE - u'\u060c' # 0xAB -> ARABIC COMMA - u'\u061b' # 0xAC -> ARABIC SEMICOLON - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u061f' # 0xAE -> ARABIC QUESTION MARK - u'\ufe81' # 0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - u'\ufe8d' # 0xB0 -> ARABIC LETTER ALEF ISOLATED FORM - u'\ufe8e' # 0xB1 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8e' # 0xB2 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8f' # 0xB3 -> ARABIC LETTER BEH ISOLATED FORM - u'\ufe91' # 0xB4 -> ARABIC LETTER BEH INITIAL FORM - u'\ufb56' # 0xB5 -> ARABIC LETTER PEH ISOLATED FORM - u'\ufb58' # 0xB6 -> ARABIC LETTER PEH INITIAL FORM - u'\ufe93' # 0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM - u'\ufe95' # 0xB8 -> ARABIC LETTER TEH ISOLATED FORM - u'\ufe97' # 0xB9 -> ARABIC LETTER TEH INITIAL FORM - u'\ufb66' # 0xBA -> ARABIC LETTER TTEH ISOLATED FORM - u'\ufb68' # 0xBB -> ARABIC LETTER TTEH INITIAL FORM - u'\ufe99' # 0xBC -> ARABIC LETTER THEH ISOLATED FORM - u'\ufe9b' # 0xBD -> ARABIC LETTER THEH INITIAL FORM - u'\ufe9d' # 0xBE -> ARABIC LETTER JEEM ISOLATED FORM - u'\ufe9f' # 0xBF -> ARABIC LETTER JEEM INITIAL FORM - u'\ufb7a' # 0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM - u'\ufb7c' # 0xC1 -> ARABIC LETTER TCHEH INITIAL FORM - u'\ufea1' # 0xC2 -> ARABIC LETTER HAH ISOLATED FORM - u'\ufea3' # 0xC3 -> ARABIC LETTER HAH INITIAL FORM - u'\ufea5' # 0xC4 -> ARABIC LETTER KHAH ISOLATED FORM - u'\ufea7' # 0xC5 -> ARABIC LETTER KHAH INITIAL FORM - u'\ufea9' # 0xC6 -> ARABIC LETTER DAL ISOLATED FORM - u'\ufb84' # 0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN - u'\ufeab' # 0xC8 -> ARABIC LETTER THAL ISOLATED FORM - u'\ufead' # 0xC9 -> ARABIC LETTER REH ISOLATED FORM - u'\ufb8c' # 0xCA -> ARABIC LETTER RREH ISOLATED FORM - u'\ufeaf' # 0xCB -> ARABIC LETTER ZAIN ISOLATED FORM - u'\ufb8a' # 0xCC -> ARABIC LETTER JEH ISOLATED FORM - u'\ufeb1' # 0xCD -> ARABIC LETTER SEEN ISOLATED FORM - u'\ufeb3' # 0xCE -> ARABIC LETTER SEEN INITIAL FORM - u'\ufeb5' # 0xCF -> ARABIC LETTER SHEEN ISOLATED FORM - u'\ufeb7' # 0xD0 -> ARABIC LETTER SHEEN INITIAL FORM - u'\ufeb9' # 0xD1 -> ARABIC LETTER SAD ISOLATED FORM - u'\ufebb' # 0xD2 -> ARABIC LETTER SAD INITIAL FORM - u'\ufebd' # 0xD3 -> ARABIC LETTER DAD ISOLATED FORM - u'\ufebf' # 0xD4 -> ARABIC LETTER DAD INITIAL FORM - u'\ufec1' # 0xD5 -> ARABIC LETTER TAH ISOLATED FORM - u'\ufec5' # 0xD6 -> ARABIC LETTER ZAH ISOLATED FORM - u'\ufec9' # 0xD7 -> ARABIC LETTER AIN ISOLATED FORM - u'\ufeca' # 0xD8 -> ARABIC LETTER AIN FINAL FORM - u'\ufecb' # 0xD9 -> ARABIC LETTER AIN INITIAL FORM - u'\ufecc' # 0xDA -> ARABIC LETTER AIN MEDIAL FORM - u'\ufecd' # 0xDB -> ARABIC LETTER GHAIN ISOLATED FORM - u'\ufece' # 0xDC -> ARABIC LETTER GHAIN FINAL FORM - u'\ufecf' # 0xDD -> ARABIC LETTER GHAIN INITIAL FORM - u'\ufed0' # 0xDE -> ARABIC LETTER GHAIN MEDIAL FORM - u'\ufed1' # 0xDF -> ARABIC LETTER FEH ISOLATED FORM - u'\ufed3' # 0xE0 -> ARABIC LETTER FEH INITIAL FORM - u'\ufed5' # 0xE1 -> ARABIC LETTER QAF ISOLATED FORM - u'\ufed7' # 0xE2 -> ARABIC LETTER QAF INITIAL FORM - u'\ufed9' # 0xE3 -> ARABIC LETTER KAF ISOLATED FORM - u'\ufedb' # 0xE4 -> ARABIC LETTER KAF INITIAL FORM - u'\ufb92' # 0xE5 -> ARABIC LETTER GAF ISOLATED FORM - u'\ufb94' # 0xE6 -> ARABIC LETTER GAF INITIAL FORM - u'\ufedd' # 0xE7 -> ARABIC LETTER LAM ISOLATED FORM - u'\ufedf' # 0xE8 -> ARABIC LETTER LAM INITIAL FORM - u'\ufee0' # 0xE9 -> ARABIC LETTER LAM MEDIAL FORM - u'\ufee1' # 0xEA -> ARABIC LETTER MEEM ISOLATED FORM - u'\ufee3' # 0xEB -> ARABIC LETTER MEEM INITIAL FORM - u'\ufb9e' # 0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM - u'\ufee5' # 0xED -> ARABIC LETTER NOON ISOLATED FORM - u'\ufee7' # 0xEE -> ARABIC LETTER NOON INITIAL FORM - u'\ufe85' # 0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - u'\ufeed' # 0xF0 -> ARABIC LETTER WAW ISOLATED FORM - u'\ufba6' # 0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM - u'\ufba8' # 0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM - u'\ufba9' # 0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM - u'\ufbaa' # 0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM - u'\ufe80' # 0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM - u'\ufe89' # 0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM - u'\ufe8a' # 0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM - u'\ufe8b' # 0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - u'\ufef1' # 0xF9 -> ARABIC LETTER YEH ISOLATED FORM - u'\ufef2' # 0xFA -> ARABIC LETTER YEH FINAL FORM - u'\ufef3' # 0xFB -> ARABIC LETTER YEH INITIAL FORM - u'\ufbb0' # 0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM - u'\ufbae' # 0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM - u'\ufe7c' # 0xFE -> ARABIC SHADDA ISOLATED FORM - u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1026.py b/python/Lib/encodings/cp1026.py deleted file mode 100755 index 45bbe626fd..0000000000 --- a/python/Lib/encodings/cp1026.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1026 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1026', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'{' # 0x48 -> LEFT CURLY BRACKET - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xc7' # 0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'!' # 0x4F -> EXCLAMATION MARK - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'\u011e' # 0x5A -> LATIN CAPITAL LETTER G WITH BREVE - u'\u0130' # 0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'^' # 0x5F -> CIRCUMFLEX ACCENT - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'[' # 0x68 -> LEFT SQUARE BRACKET - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u015f' # 0x6A -> LATIN SMALL LETTER S WITH CEDILLA - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u0131' # 0x79 -> LATIN SMALL LETTER DOTLESS I - u':' # 0x7A -> COLON - u'\xd6' # 0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u015e' # 0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'\xdc' # 0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'}' # 0x8C -> RIGHT CURLY BRACKET - u'`' # 0x8D -> GRAVE ACCENT - u'\xa6' # 0x8E -> BROKEN BAR - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'\xf6' # 0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u']' # 0xAC -> RIGHT SQUARE BRACKET - u'$' # 0xAD -> DOLLAR SIGN - u'@' # 0xAE -> COMMERCIAL AT - u'\xae' # 0xAF -> REGISTERED SIGN - u'\xa2' # 0xB0 -> CENT SIGN - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'\xac' # 0xBA -> NOT SIGN - u'|' # 0xBB -> VERTICAL LINE - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'\xe7' # 0xC0 -> LATIN SMALL LETTER C WITH CEDILLA - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'~' # 0xCC -> TILDE - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'\u011f' # 0xD0 -> LATIN SMALL LETTER G WITH BREVE - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\\' # 0xDC -> REVERSE SOLIDUS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xfc' # 0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'#' # 0xEC -> NUMBER SIGN - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'"' # 0xFC -> QUOTATION MARK - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1140.py b/python/Lib/encodings/cp1140.py deleted file mode 100755 index 7e507fd853..0000000000 --- a/python/Lib/encodings/cp1140.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1140 generated from 'python-mappings/CP1140.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1140', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xa2' # 0x4A -> CENT SIGN - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'|' # 0x4F -> VERTICAL LINE - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'!' # 0x5A -> EXCLAMATION MARK - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'\xac' # 0x5F -> NOT SIGN - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\u20ac' # 0x9F -> EURO SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xAF -> REGISTERED SIGN - u'^' # 0xB0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xBA -> LEFT SQUARE BRACKET - u']' # 0xBB -> RIGHT SQUARE BRACKET - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1250.py b/python/Lib/encodings/cp1250.py deleted file mode 100755 index d620b89335..0000000000 --- a/python/Lib/encodings/cp1250.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1250 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1250', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\ufffe' # 0x83 -> UNDEFINED - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\ufffe' # 0x88 -> UNDEFINED - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u015a' # 0x8C -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u0164' # 0x8D -> LATIN CAPITAL LETTER T WITH CARON - u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON - u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u015b' # 0x9C -> LATIN SMALL LETTER S WITH ACUTE - u'\u0165' # 0x9D -> LATIN SMALL LETTER T WITH CARON - u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON - u'\u017a' # 0x9F -> LATIN SMALL LETTER Z WITH ACUTE - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u02c7' # 0xA1 -> CARON - u'\u02d8' # 0xA2 -> BREVE - u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u0104' # 0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u02db' # 0xB2 -> OGONEK - u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\u0105' # 0xB9 -> LATIN SMALL LETTER A WITH OGONEK - u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u013d' # 0xBC -> LATIN CAPITAL LETTER L WITH CARON - u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT - u'\u013e' # 0xBE -> LATIN SMALL LETTER L WITH CARON - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON - u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE - u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON - u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA - u'\u02d9' # 0xFF -> DOT ABOVE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1251.py b/python/Lib/encodings/cp1251.py deleted file mode 100755 index 216771fa4c..0000000000 --- a/python/Lib/encodings/cp1251.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1251 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1251', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE - u'\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0453' # 0x83 -> CYRILLIC SMALL LETTER GJE - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u20ac' # 0x88 -> EURO SIGN - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE - u'\u040c' # 0x8D -> CYRILLIC CAPITAL LETTER KJE - u'\u040b' # 0x8E -> CYRILLIC CAPITAL LETTER TSHE - u'\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE - u'\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE - u'\u045c' # 0x9D -> CYRILLIC SMALL LETTER KJE - u'\u045b' # 0x9E -> CYRILLIC SMALL LETTER TSHE - u'\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U - u'\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u0490' # 0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0404' # 0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u0407' # 0xAF -> CYRILLIC CAPITAL LETTER YI - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0491' # 0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO - u'\u2116' # 0xB9 -> NUMERO SIGN - u'\u0454' # 0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE - u'\u0405' # 0xBD -> CYRILLIC CAPITAL LETTER DZE - u'\u0455' # 0xBE -> CYRILLIC SMALL LETTER DZE - u'\u0457' # 0xBF -> CYRILLIC SMALL LETTER YI - u'\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1252.py b/python/Lib/encodings/cp1252.py deleted file mode 100755 index e60a328db4..0000000000 --- a/python/Lib/encodings/cp1252.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1252 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1252', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8D -> UNDEFINED - u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9D -> UNDEFINED - u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON - u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1253.py b/python/Lib/encodings/cp1253.py deleted file mode 100755 index 49f6cccbd2..0000000000 --- a/python/Lib/encodings/cp1253.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1253 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1253', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\ufffe' # 0x88 -> UNDEFINED - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9C -> UNDEFINED - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0385' # 0xA1 -> GREEK DIALYTIKA TONOS - u'\u0386' # 0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\ufffe' # 0xAA -> UNDEFINED - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u2015' # 0xAF -> HORIZONTAL BAR - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u0384' # 0xB4 -> GREEK TONOS - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU - u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU - u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI - u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO - u'\ufffe' # 0xD2 -> UNDEFINED - u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU - u'\u03bd' # 0xED -> GREEK SMALL LETTER NU - u'\u03be' # 0xEE -> GREEK SMALL LETTER XI - u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI - u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO - u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI - u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA - u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\ufffe' # 0xFF -> UNDEFINED -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1254.py b/python/Lib/encodings/cp1254.py deleted file mode 100755 index 65530ab546..0000000000 --- a/python/Lib/encodings/cp1254.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1254 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1254', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1255.py b/python/Lib/encodings/cp1255.py deleted file mode 100755 index fd1456fab6..0000000000 --- a/python/Lib/encodings/cp1255.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1255 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1255', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9C -> UNDEFINED - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\u20aa' # 0xA4 -> NEW SHEQEL SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xd7' # 0xAA -> MULTIPLICATION SIGN - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xf7' # 0xBA -> DIVISION SIGN - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\u05b0' # 0xC0 -> HEBREW POINT SHEVA - u'\u05b1' # 0xC1 -> HEBREW POINT HATAF SEGOL - u'\u05b2' # 0xC2 -> HEBREW POINT HATAF PATAH - u'\u05b3' # 0xC3 -> HEBREW POINT HATAF QAMATS - u'\u05b4' # 0xC4 -> HEBREW POINT HIRIQ - u'\u05b5' # 0xC5 -> HEBREW POINT TSERE - u'\u05b6' # 0xC6 -> HEBREW POINT SEGOL - u'\u05b7' # 0xC7 -> HEBREW POINT PATAH - u'\u05b8' # 0xC8 -> HEBREW POINT QAMATS - u'\u05b9' # 0xC9 -> HEBREW POINT HOLAM - u'\ufffe' # 0xCA -> UNDEFINED - u'\u05bb' # 0xCB -> HEBREW POINT QUBUTS - u'\u05bc' # 0xCC -> HEBREW POINT DAGESH OR MAPIQ - u'\u05bd' # 0xCD -> HEBREW POINT METEG - u'\u05be' # 0xCE -> HEBREW PUNCTUATION MAQAF - u'\u05bf' # 0xCF -> HEBREW POINT RAFE - u'\u05c0' # 0xD0 -> HEBREW PUNCTUATION PASEQ - u'\u05c1' # 0xD1 -> HEBREW POINT SHIN DOT - u'\u05c2' # 0xD2 -> HEBREW POINT SIN DOT - u'\u05c3' # 0xD3 -> HEBREW PUNCTUATION SOF PASUQ - u'\u05f0' # 0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV - u'\u05f1' # 0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD - u'\u05f2' # 0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD - u'\u05f3' # 0xD7 -> HEBREW PUNCTUATION GERESH - u'\u05f4' # 0xD8 -> HEBREW PUNCTUATION GERSHAYIM - u'\ufffe' # 0xD9 -> UNDEFINED - u'\ufffe' # 0xDA -> UNDEFINED - u'\ufffe' # 0xDB -> UNDEFINED - u'\ufffe' # 0xDC -> UNDEFINED - u'\ufffe' # 0xDD -> UNDEFINED - u'\ufffe' # 0xDE -> UNDEFINED - u'\ufffe' # 0xDF -> UNDEFINED - u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF - u'\u05d1' # 0xE1 -> HEBREW LETTER BET - u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL - u'\u05d3' # 0xE3 -> HEBREW LETTER DALET - u'\u05d4' # 0xE4 -> HEBREW LETTER HE - u'\u05d5' # 0xE5 -> HEBREW LETTER VAV - u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0xE7 -> HEBREW LETTER HET - u'\u05d8' # 0xE8 -> HEBREW LETTER TET - u'\u05d9' # 0xE9 -> HEBREW LETTER YOD - u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF - u'\u05db' # 0xEB -> HEBREW LETTER KAF - u'\u05dc' # 0xEC -> HEBREW LETTER LAMED - u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM - u'\u05de' # 0xEE -> HEBREW LETTER MEM - u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0xF0 -> HEBREW LETTER NUN - u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN - u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0xF4 -> HEBREW LETTER PE - u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI - u'\u05e7' # 0xF7 -> HEBREW LETTER QOF - u'\u05e8' # 0xF8 -> HEBREW LETTER RESH - u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN - u'\u05ea' # 0xFA -> HEBREW LETTER TAV - u'\ufffe' # 0xFB -> UNDEFINED - u'\ufffe' # 0xFC -> UNDEFINED - u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK - u'\ufffe' # 0xFF -> UNDEFINED -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1256.py b/python/Lib/encodings/cp1256.py deleted file mode 100755 index 302b5fa066..0000000000 --- a/python/Lib/encodings/cp1256.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1256 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1256', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\u067e' # 0x81 -> ARABIC LETTER PEH - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0679' # 0x8A -> ARABIC LETTER TTEH - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\u0686' # 0x8D -> ARABIC LETTER TCHEH - u'\u0698' # 0x8E -> ARABIC LETTER JEH - u'\u0688' # 0x8F -> ARABIC LETTER DDAL - u'\u06af' # 0x90 -> ARABIC LETTER GAF - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u06a9' # 0x98 -> ARABIC LETTER KEHEH - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0691' # 0x9A -> ARABIC LETTER RREH - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\u200c' # 0x9D -> ZERO WIDTH NON-JOINER - u'\u200d' # 0x9E -> ZERO WIDTH JOINER - u'\u06ba' # 0x9F -> ARABIC LETTER NOON GHUNNA - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u060c' # 0xA1 -> ARABIC COMMA - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u06be' # 0xAA -> ARABIC LETTER HEH DOACHASHMEE - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\u061b' # 0xBA -> ARABIC SEMICOLON - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\u061f' # 0xBF -> ARABIC QUESTION MARK - u'\u06c1' # 0xC0 -> ARABIC LETTER HEH GOAL - u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xC7 -> ARABIC LETTER ALEF - u'\u0628' # 0xC8 -> ARABIC LETTER BEH - u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xCA -> ARABIC LETTER TEH - u'\u062b' # 0xCB -> ARABIC LETTER THEH - u'\u062c' # 0xCC -> ARABIC LETTER JEEM - u'\u062d' # 0xCD -> ARABIC LETTER HAH - u'\u062e' # 0xCE -> ARABIC LETTER KHAH - u'\u062f' # 0xCF -> ARABIC LETTER DAL - u'\u0630' # 0xD0 -> ARABIC LETTER THAL - u'\u0631' # 0xD1 -> ARABIC LETTER REH - u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xD3 -> ARABIC LETTER SEEN - u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xD5 -> ARABIC LETTER SAD - u'\u0636' # 0xD6 -> ARABIC LETTER DAD - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0637' # 0xD8 -> ARABIC LETTER TAH - u'\u0638' # 0xD9 -> ARABIC LETTER ZAH - u'\u0639' # 0xDA -> ARABIC LETTER AIN - u'\u063a' # 0xDB -> ARABIC LETTER GHAIN - u'\u0640' # 0xDC -> ARABIC TATWEEL - u'\u0641' # 0xDD -> ARABIC LETTER FEH - u'\u0642' # 0xDE -> ARABIC LETTER QAF - u'\u0643' # 0xDF -> ARABIC LETTER KAF - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\u0644' # 0xE1 -> ARABIC LETTER LAM - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0645' # 0xE3 -> ARABIC LETTER MEEM - u'\u0646' # 0xE4 -> ARABIC LETTER NOON - u'\u0647' # 0xE5 -> ARABIC LETTER HEH - u'\u0648' # 0xE6 -> ARABIC LETTER WAW - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0649' # 0xEC -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xED -> ARABIC LETTER YEH - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u064b' # 0xF0 -> ARABIC FATHATAN - u'\u064c' # 0xF1 -> ARABIC DAMMATAN - u'\u064d' # 0xF2 -> ARABIC KASRATAN - u'\u064e' # 0xF3 -> ARABIC FATHA - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u064f' # 0xF5 -> ARABIC DAMMA - u'\u0650' # 0xF6 -> ARABIC KASRA - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0651' # 0xF8 -> ARABIC SHADDA - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\u0652' # 0xFA -> ARABIC SUKUN - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK - u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1257.py b/python/Lib/encodings/cp1257.py deleted file mode 100755 index 53a6b29d5b..0000000000 --- a/python/Lib/encodings/cp1257.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1257 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1257', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\ufffe' # 0x83 -> UNDEFINED - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\ufffe' # 0x88 -> UNDEFINED - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\xa8' # 0x8D -> DIAERESIS - u'\u02c7' # 0x8E -> CARON - u'\xb8' # 0x8F -> CEDILLA - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9C -> UNDEFINED - u'\xaf' # 0x9D -> MACRON - u'\u02db' # 0x9E -> OGONEK - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\ufffe' # 0xA1 -> UNDEFINED - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\ufffe' # 0xA5 -> UNDEFINED - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xe6' # 0xBF -> LATIN SMALL LETTER AE - u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON - u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON - u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE - u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA - u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON - u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK - u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE - u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE - u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON - u'\u02d9' # 0xFF -> DOT ABOVE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp1258.py b/python/Lib/encodings/cp1258.py deleted file mode 100755 index 4b25d8e7e8..0000000000 --- a/python/Lib/encodings/cp1258.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1258 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1258', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0300' # 0xCC -> COMBINING GRAVE ACCENT - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u0309' # 0xD2 -> COMBINING HOOK ABOVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u01a0' # 0xD5 -> LATIN CAPITAL LETTER O WITH HORN - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u01af' # 0xDD -> LATIN CAPITAL LETTER U WITH HORN - u'\u0303' # 0xDE -> COMBINING TILDE - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0301' # 0xEC -> COMBINING ACUTE ACCENT - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\u0323' # 0xF2 -> COMBINING DOT BELOW - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u01a1' # 0xF5 -> LATIN SMALL LETTER O WITH HORN - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u01b0' # 0xFD -> LATIN SMALL LETTER U WITH HORN - u'\u20ab' # 0xFE -> DONG SIGN - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp424.py b/python/Lib/encodings/cp424.py deleted file mode 100755 index d3ade22776..0000000000 --- a/python/Lib/encodings/cp424.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp424 generated from 'MAPPINGS/VENDORS/MISC/CP424.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp424', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> SELECT - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> REQUIRED NEW LINE - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> GRAPHIC ESCAPE - u'\x8d' # 0x09 -> SUPERSCRIPT - u'\x8e' # 0x0A -> REPEAT - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> RESTORE/ENABLE PRESENTATION - u'\x85' # 0x15 -> NEW LINE - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> PROGRAM OPERATOR COMMUNICATION - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> UNIT BACK SPACE - u'\x8f' # 0x1B -> CUSTOMER USE ONE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> DIGIT SELECT - u'\x81' # 0x21 -> START OF SIGNIFICANCE - u'\x82' # 0x22 -> FIELD SEPARATOR - u'\x83' # 0x23 -> WORD UNDERSCORE - u'\x84' # 0x24 -> BYPASS OR INHIBIT PRESENTATION - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> SET ATTRIBUTE - u'\x89' # 0x29 -> START FIELD EXTENDED - u'\x8a' # 0x2A -> SET MODE OR SWITCH - u'\x8b' # 0x2B -> CONTROL SEQUENCE PREFIX - u'\x8c' # 0x2C -> MODIFY FIELD ATTRIBUTE - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> - u'\x91' # 0x31 -> - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> INDEX RETURN - u'\x94' # 0x34 -> PRESENTATION POSITION - u'\x95' # 0x35 -> TRANSPARENT - u'\x96' # 0x36 -> NUMERIC BACKSPACE - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> SUBSCRIPT - u'\x99' # 0x39 -> INDENT TABULATION - u'\x9a' # 0x3A -> REVERSE FORM FEED - u'\x9b' # 0x3B -> CUSTOMER USE THREE - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\u05d0' # 0x41 -> HEBREW LETTER ALEF - u'\u05d1' # 0x42 -> HEBREW LETTER BET - u'\u05d2' # 0x43 -> HEBREW LETTER GIMEL - u'\u05d3' # 0x44 -> HEBREW LETTER DALET - u'\u05d4' # 0x45 -> HEBREW LETTER HE - u'\u05d5' # 0x46 -> HEBREW LETTER VAV - u'\u05d6' # 0x47 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0x48 -> HEBREW LETTER HET - u'\u05d8' # 0x49 -> HEBREW LETTER TET - u'\xa2' # 0x4A -> CENT SIGN - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'|' # 0x4F -> VERTICAL LINE - u'&' # 0x50 -> AMPERSAND - u'\u05d9' # 0x51 -> HEBREW LETTER YOD - u'\u05da' # 0x52 -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x53 -> HEBREW LETTER KAF - u'\u05dc' # 0x54 -> HEBREW LETTER LAMED - u'\u05dd' # 0x55 -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x56 -> HEBREW LETTER MEM - u'\u05df' # 0x57 -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0x58 -> HEBREW LETTER NUN - u'\u05e1' # 0x59 -> HEBREW LETTER SAMEKH - u'!' # 0x5A -> EXCLAMATION MARK - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'\xac' # 0x5F -> NOT SIGN - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\u05e2' # 0x62 -> HEBREW LETTER AYIN - u'\u05e3' # 0x63 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0x64 -> HEBREW LETTER PE - u'\u05e5' # 0x65 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0x66 -> HEBREW LETTER TSADI - u'\u05e7' # 0x67 -> HEBREW LETTER QOF - u'\u05e8' # 0x68 -> HEBREW LETTER RESH - u'\u05e9' # 0x69 -> HEBREW LETTER SHIN - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\ufffe' # 0x70 -> UNDEFINED - u'\u05ea' # 0x71 -> HEBREW LETTER TAV - u'\ufffe' # 0x72 -> UNDEFINED - u'\ufffe' # 0x73 -> UNDEFINED - u'\xa0' # 0x74 -> NO-BREAK SPACE - u'\ufffe' # 0x75 -> UNDEFINED - u'\ufffe' # 0x76 -> UNDEFINED - u'\ufffe' # 0x77 -> UNDEFINED - u'\u2017' # 0x78 -> DOUBLE LOW LINE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\ufffe' # 0x80 -> UNDEFINED - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\ufffe' # 0x9A -> UNDEFINED - u'\ufffe' # 0x9B -> UNDEFINED - u'\ufffe' # 0x9C -> UNDEFINED - u'\xb8' # 0x9D -> CEDILLA - u'\ufffe' # 0x9E -> UNDEFINED - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\ufffe' # 0xAA -> UNDEFINED - u'\ufffe' # 0xAB -> UNDEFINED - u'\ufffe' # 0xAC -> UNDEFINED - u'\ufffe' # 0xAD -> UNDEFINED - u'\ufffe' # 0xAE -> UNDEFINED - u'\xae' # 0xAF -> REGISTERED SIGN - u'^' # 0xB0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xBA -> LEFT SQUARE BRACKET - u']' # 0xBB -> RIGHT SQUARE BRACKET - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\ufffe' # 0xCB -> UNDEFINED - u'\ufffe' # 0xCC -> UNDEFINED - u'\ufffe' # 0xCD -> UNDEFINED - u'\ufffe' # 0xCE -> UNDEFINED - u'\ufffe' # 0xCF -> UNDEFINED - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\ufffe' # 0xDB -> UNDEFINED - u'\ufffe' # 0xDC -> UNDEFINED - u'\ufffe' # 0xDD -> UNDEFINED - u'\ufffe' # 0xDE -> UNDEFINED - u'\ufffe' # 0xDF -> UNDEFINED - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\ufffe' # 0xEB -> UNDEFINED - u'\ufffe' # 0xEC -> UNDEFINED - u'\ufffe' # 0xED -> UNDEFINED - u'\ufffe' # 0xEE -> UNDEFINED - u'\ufffe' # 0xEF -> UNDEFINED - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\ufffe' # 0xFB -> UNDEFINED - u'\ufffe' # 0xFC -> UNDEFINED - u'\ufffe' # 0xFD -> UNDEFINED - u'\ufffe' # 0xFE -> UNDEFINED - u'\x9f' # 0xFF -> EIGHT ONES -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp437.py b/python/Lib/encodings/cp437.py deleted file mode 100755 index 52cd882942..0000000000 --- a/python/Lib/encodings/cp437.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec cp437 generated from 'VENDORS/MICSFT/PC/CP437.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp437', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xa5' # 0x009d -> YEN SIGN - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a5: 0x009d, # YEN SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp500.py b/python/Lib/encodings/cp500.py deleted file mode 100755 index 60766c0393..0000000000 --- a/python/Lib/encodings/cp500.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp500 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp500', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'[' # 0x4A -> LEFT SQUARE BRACKET - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'!' # 0x4F -> EXCLAMATION MARK - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u']' # 0x5A -> RIGHT SQUARE BRACKET - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'^' # 0x5F -> CIRCUMFLEX ACCENT - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xAF -> REGISTERED SIGN - u'\xa2' # 0xB0 -> CENT SIGN - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'\xac' # 0xBA -> NOT SIGN - u'|' # 0xBB -> VERTICAL LINE - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp720.py b/python/Lib/encodings/cp720.py deleted file mode 100644 index 5c96d9813c..0000000000 --- a/python/Lib/encodings/cp720.py +++ /dev/null @@ -1,309 +0,0 @@ -"""Python Character Mapping Codec cp720 generated on Windows: -Vista 6.0.6002 SP2 Multiprocessor Free with the command: - python Tools/unicode/genwincodec.py 720 -"""#" - - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp720', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\x80' - u'\x81' - u'\xe9' # 0x82 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x83 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\x84' - u'\xe0' # 0x85 -> LATIN SMALL LETTER A WITH GRAVE - u'\x86' - u'\xe7' # 0x87 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x88 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x89 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x8A -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x8B -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x8C -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\x8d' - u'\x8e' - u'\x8f' - u'\x90' - u'\u0651' # 0x91 -> ARABIC SHADDA - u'\u0652' # 0x92 -> ARABIC SUKUN - u'\xf4' # 0x93 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xa4' # 0x94 -> CURRENCY SIGN - u'\u0640' # 0x95 -> ARABIC TATWEEL - u'\xfb' # 0x96 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x97 -> LATIN SMALL LETTER U WITH GRAVE - u'\u0621' # 0x98 -> ARABIC LETTER HAMZA - u'\u0622' # 0x99 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0x9A -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0x9B -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\xa3' # 0x9C -> POUND SIGN - u'\u0625' # 0x9D -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0x9E -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0x9F -> ARABIC LETTER ALEF - u'\u0628' # 0xA0 -> ARABIC LETTER BEH - u'\u0629' # 0xA1 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xA2 -> ARABIC LETTER TEH - u'\u062b' # 0xA3 -> ARABIC LETTER THEH - u'\u062c' # 0xA4 -> ARABIC LETTER JEEM - u'\u062d' # 0xA5 -> ARABIC LETTER HAH - u'\u062e' # 0xA6 -> ARABIC LETTER KHAH - u'\u062f' # 0xA7 -> ARABIC LETTER DAL - u'\u0630' # 0xA8 -> ARABIC LETTER THAL - u'\u0631' # 0xA9 -> ARABIC LETTER REH - u'\u0632' # 0xAA -> ARABIC LETTER ZAIN - u'\u0633' # 0xAB -> ARABIC LETTER SEEN - u'\u0634' # 0xAC -> ARABIC LETTER SHEEN - u'\u0635' # 0xAD -> ARABIC LETTER SAD - u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0xB0 -> LIGHT SHADE - u'\u2592' # 0xB1 -> MEDIUM SHADE - u'\u2593' # 0xB2 -> DARK SHADE - u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0xB5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0xB6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0xB8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0xBD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0xBE -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0xC6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0xC7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0xCF -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0xD0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0xD1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0xD2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0xD3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0xD4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0xD5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0xD6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0xD7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0xD8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0xDB -> FULL BLOCK - u'\u2584' # 0xDC -> LOWER HALF BLOCK - u'\u258c' # 0xDD -> LEFT HALF BLOCK - u'\u2590' # 0xDE -> RIGHT HALF BLOCK - u'\u2580' # 0xDF -> UPPER HALF BLOCK - u'\u0636' # 0xE0 -> ARABIC LETTER DAD - u'\u0637' # 0xE1 -> ARABIC LETTER TAH - u'\u0638' # 0xE2 -> ARABIC LETTER ZAH - u'\u0639' # 0xE3 -> ARABIC LETTER AIN - u'\u063a' # 0xE4 -> ARABIC LETTER GHAIN - u'\u0641' # 0xE5 -> ARABIC LETTER FEH - u'\xb5' # 0xE6 -> MICRO SIGN - u'\u0642' # 0xE7 -> ARABIC LETTER QAF - u'\u0643' # 0xE8 -> ARABIC LETTER KAF - u'\u0644' # 0xE9 -> ARABIC LETTER LAM - u'\u0645' # 0xEA -> ARABIC LETTER MEEM - u'\u0646' # 0xEB -> ARABIC LETTER NOON - u'\u0647' # 0xEC -> ARABIC LETTER HEH - u'\u0648' # 0xED -> ARABIC LETTER WAW - u'\u0649' # 0xEE -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xEF -> ARABIC LETTER YEH - u'\u2261' # 0xF0 -> IDENTICAL TO - u'\u064b' # 0xF1 -> ARABIC FATHATAN - u'\u064c' # 0xF2 -> ARABIC DAMMATAN - u'\u064d' # 0xF3 -> ARABIC KASRATAN - u'\u064e' # 0xF4 -> ARABIC FATHA - u'\u064f' # 0xF5 -> ARABIC DAMMA - u'\u0650' # 0xF6 -> ARABIC KASRA - u'\u2248' # 0xF7 -> ALMOST EQUAL TO - u'\xb0' # 0xF8 -> DEGREE SIGN - u'\u2219' # 0xF9 -> BULLET OPERATOR - u'\xb7' # 0xFA -> MIDDLE DOT - u'\u221a' # 0xFB -> SQUARE ROOT - u'\u207f' # 0xFC -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0xFD -> SUPERSCRIPT TWO - u'\u25a0' # 0xFE -> BLACK SQUARE - u'\xa0' # 0xFF -> NO-BREAK SPACE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp737.py b/python/Lib/encodings/cp737.py deleted file mode 100755 index d6544482d2..0000000000 --- a/python/Lib/encodings/cp737.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec cp737 generated from 'VENDORS/MICSFT/PC/CP737.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp737', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA - 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA - 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA - 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x008b: 0x039c, # GREEK CAPITAL LETTER MU - 0x008c: 0x039d, # GREEK CAPITAL LETTER NU - 0x008d: 0x039e, # GREEK CAPITAL LETTER XI - 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI - 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x0099: 0x03b2, # GREEK SMALL LETTER BETA - 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA - 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA - 0x009e: 0x03b7, # GREEK SMALL LETTER ETA - 0x009f: 0x03b8, # GREEK SMALL LETTER THETA - 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00a3: 0x03bc, # GREEK SMALL LETTER MU - 0x00a4: 0x03bd, # GREEK SMALL LETTER NU - 0x00a5: 0x03be, # GREEK SMALL LETTER XI - 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00a7: 0x03c0, # GREEK SMALL LETTER PI - 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO - 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI - 0x00af: 0x03c8, # GREEK SMALL LETTER PSI - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0391' # 0x0080 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0x0081 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0x0082 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0x0083 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0x0084 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0x0085 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0x0086 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0x0087 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0x0088 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0x0089 -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0x008a -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0x008b -> GREEK CAPITAL LETTER MU - u'\u039d' # 0x008c -> GREEK CAPITAL LETTER NU - u'\u039e' # 0x008d -> GREEK CAPITAL LETTER XI - u'\u039f' # 0x008e -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0x008f -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0x0090 -> GREEK CAPITAL LETTER RHO - u'\u03a3' # 0x0091 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0x0092 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0x0093 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0x0094 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0x0095 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0x0096 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0x0097 -> GREEK CAPITAL LETTER OMEGA - u'\u03b1' # 0x0098 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0x0099 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0x009a -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0x009b -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0x009c -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0x009d -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0x009e -> GREEK SMALL LETTER ETA - u'\u03b8' # 0x009f -> GREEK SMALL LETTER THETA - u'\u03b9' # 0x00a0 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0x00a1 -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0x00a2 -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0x00a3 -> GREEK SMALL LETTER MU - u'\u03bd' # 0x00a4 -> GREEK SMALL LETTER NU - u'\u03be' # 0x00a5 -> GREEK SMALL LETTER XI - u'\u03bf' # 0x00a6 -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0x00a7 -> GREEK SMALL LETTER PI - u'\u03c1' # 0x00a8 -> GREEK SMALL LETTER RHO - u'\u03c3' # 0x00a9 -> GREEK SMALL LETTER SIGMA - u'\u03c2' # 0x00aa -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c4' # 0x00ab -> GREEK SMALL LETTER TAU - u'\u03c5' # 0x00ac -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0x00ad -> GREEK SMALL LETTER PHI - u'\u03c7' # 0x00ae -> GREEK SMALL LETTER CHI - u'\u03c8' # 0x00af -> GREEK SMALL LETTER PSI - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03c9' # 0x00e0 -> GREEK SMALL LETTER OMEGA - u'\u03ac' # 0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03ca' # 0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03af' # 0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03cb' # 0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03ce' # 0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u0386' # 0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\u0388' # 0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u038c' # 0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\u038e' # 0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u03aa' # 0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00f7: 0x00f6, # DIVISION SIGN - 0x0386: 0x00ea, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0x00eb, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0x00ec, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038a: 0x00ed, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038c: 0x00ee, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038e: 0x00ef, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038f: 0x00f0, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0391: 0x0080, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0x0081, # GREEK CAPITAL LETTER BETA - 0x0393: 0x0082, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0x0083, # GREEK CAPITAL LETTER DELTA - 0x0395: 0x0084, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0x0085, # GREEK CAPITAL LETTER ZETA - 0x0397: 0x0086, # GREEK CAPITAL LETTER ETA - 0x0398: 0x0087, # GREEK CAPITAL LETTER THETA - 0x0399: 0x0088, # GREEK CAPITAL LETTER IOTA - 0x039a: 0x0089, # GREEK CAPITAL LETTER KAPPA - 0x039b: 0x008a, # GREEK CAPITAL LETTER LAMDA - 0x039c: 0x008b, # GREEK CAPITAL LETTER MU - 0x039d: 0x008c, # GREEK CAPITAL LETTER NU - 0x039e: 0x008d, # GREEK CAPITAL LETTER XI - 0x039f: 0x008e, # GREEK CAPITAL LETTER OMICRON - 0x03a0: 0x008f, # GREEK CAPITAL LETTER PI - 0x03a1: 0x0090, # GREEK CAPITAL LETTER RHO - 0x03a3: 0x0091, # GREEK CAPITAL LETTER SIGMA - 0x03a4: 0x0092, # GREEK CAPITAL LETTER TAU - 0x03a5: 0x0093, # GREEK CAPITAL LETTER UPSILON - 0x03a6: 0x0094, # GREEK CAPITAL LETTER PHI - 0x03a7: 0x0095, # GREEK CAPITAL LETTER CHI - 0x03a8: 0x0096, # GREEK CAPITAL LETTER PSI - 0x03a9: 0x0097, # GREEK CAPITAL LETTER OMEGA - 0x03aa: 0x00f4, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03ab: 0x00f5, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03ac: 0x00e1, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03ad: 0x00e2, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03ae: 0x00e3, # GREEK SMALL LETTER ETA WITH TONOS - 0x03af: 0x00e5, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03b1: 0x0098, # GREEK SMALL LETTER ALPHA - 0x03b2: 0x0099, # GREEK SMALL LETTER BETA - 0x03b3: 0x009a, # GREEK SMALL LETTER GAMMA - 0x03b4: 0x009b, # GREEK SMALL LETTER DELTA - 0x03b5: 0x009c, # GREEK SMALL LETTER EPSILON - 0x03b6: 0x009d, # GREEK SMALL LETTER ZETA - 0x03b7: 0x009e, # GREEK SMALL LETTER ETA - 0x03b8: 0x009f, # GREEK SMALL LETTER THETA - 0x03b9: 0x00a0, # GREEK SMALL LETTER IOTA - 0x03ba: 0x00a1, # GREEK SMALL LETTER KAPPA - 0x03bb: 0x00a2, # GREEK SMALL LETTER LAMDA - 0x03bc: 0x00a3, # GREEK SMALL LETTER MU - 0x03bd: 0x00a4, # GREEK SMALL LETTER NU - 0x03be: 0x00a5, # GREEK SMALL LETTER XI - 0x03bf: 0x00a6, # GREEK SMALL LETTER OMICRON - 0x03c0: 0x00a7, # GREEK SMALL LETTER PI - 0x03c1: 0x00a8, # GREEK SMALL LETTER RHO - 0x03c2: 0x00aa, # GREEK SMALL LETTER FINAL SIGMA - 0x03c3: 0x00a9, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00ab, # GREEK SMALL LETTER TAU - 0x03c5: 0x00ac, # GREEK SMALL LETTER UPSILON - 0x03c6: 0x00ad, # GREEK SMALL LETTER PHI - 0x03c7: 0x00ae, # GREEK SMALL LETTER CHI - 0x03c8: 0x00af, # GREEK SMALL LETTER PSI - 0x03c9: 0x00e0, # GREEK SMALL LETTER OMEGA - 0x03ca: 0x00e4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03cb: 0x00e8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03cc: 0x00e6, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03cd: 0x00e7, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03ce: 0x00e9, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp775.py b/python/Lib/encodings/cp775.py deleted file mode 100755 index 6a456a5825..0000000000 --- a/python/Lib/encodings/cp775.py +++ /dev/null @@ -1,697 +0,0 @@ -""" Python Character Mapping Codec cp775 generated from 'VENDORS/MICSFT/PC/CP775.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp775', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0096: 0x00a2, # CENT SIGN - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00a7: 0x00a6, # BROKEN BAR - 0x00a8: 0x00a9, # COPYRIGHT SIGN - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0106' # 0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0101' # 0x0083 -> LATIN SMALL LETTER A WITH MACRON - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0123' # 0x0085 -> LATIN SMALL LETTER G WITH CEDILLA - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0107' # 0x0087 -> LATIN SMALL LETTER C WITH ACUTE - u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE - u'\u0113' # 0x0089 -> LATIN SMALL LETTER E WITH MACRON - u'\u0156' # 0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\u0157' # 0x008b -> LATIN SMALL LETTER R WITH CEDILLA - u'\u012b' # 0x008c -> LATIN SMALL LETTER I WITH MACRON - u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\u014d' # 0x0093 -> LATIN SMALL LETTER O WITH MACRON - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u0122' # 0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\xa2' # 0x0096 -> CENT SIGN - u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\xa4' # 0x009f -> CURRENCY SIGN - u'\u0100' # 0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u012a' # 0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\u017b' # 0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017c' # 0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017a' # 0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE - u'\u201d' # 0x00a6 -> RIGHT DOUBLE QUOTATION MARK - u'\xa6' # 0x00a7 -> BROKEN BAR - u'\xa9' # 0x00a8 -> COPYRIGHT SIGN - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\u0141' # 0x00ad -> LATIN CAPITAL LETTER L WITH STROKE - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u0104' # 0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u010c' # 0x00b6 -> LATIN CAPITAL LETTER C WITH CARON - u'\u0118' # 0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0116' # 0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u012e' # 0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0160' # 0x00be -> LATIN CAPITAL LETTER S WITH CARON - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u0172' # 0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u016a' # 0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u017d' # 0x00cf -> LATIN CAPITAL LETTER Z WITH CARON - u'\u0105' # 0x00d0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u010d' # 0x00d1 -> LATIN SMALL LETTER C WITH CARON - u'\u0119' # 0x00d2 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0117' # 0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u012f' # 0x00d4 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0161' # 0x00d5 -> LATIN SMALL LETTER S WITH CARON - u'\u0173' # 0x00d6 -> LATIN SMALL LETTER U WITH OGONEK - u'\u016b' # 0x00d7 -> LATIN SMALL LETTER U WITH MACRON - u'\u017e' # 0x00d8 -> LATIN SMALL LETTER Z WITH CARON - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'\u014c' # 0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u0144' # 0x00e7 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0136' # 0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u0137' # 0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA - u'\u013b' # 0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u013c' # 0x00eb -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0146' # 0x00ec -> LATIN SMALL LETTER N WITH CEDILLA - u'\u0112' # 0x00ed -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0145' # 0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u2019' # 0x00ef -> RIGHT SINGLE QUOTATION MARK - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u201c' # 0x00f2 -> LEFT DOUBLE QUOTATION MARK - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u201e' # 0x00f7 -> DOUBLE LOW-9 QUOTATION MARK - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a2: 0x0096, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x009f, # CURRENCY SIGN - 0x00a6: 0x00a7, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a9: 0x00a8, # COPYRIGHT SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0x00a0, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0x0083, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0x00b5, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0x00d0, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0x0080, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0x0087, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0x00b6, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0x00d1, # LATIN SMALL LETTER C WITH CARON - 0x0112: 0x00ed, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0x0089, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0x00b8, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0x00d3, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0x00b7, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0x00d2, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0x0095, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0x0085, # LATIN SMALL LETTER G WITH CEDILLA - 0x012a: 0x00a1, # LATIN CAPITAL LETTER I WITH MACRON - 0x012b: 0x008c, # LATIN SMALL LETTER I WITH MACRON - 0x012e: 0x00bd, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012f: 0x00d4, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0x00e8, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0x00e9, # LATIN SMALL LETTER K WITH CEDILLA - 0x013b: 0x00ea, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013c: 0x00eb, # LATIN SMALL LETTER L WITH CEDILLA - 0x0141: 0x00ad, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0x00e7, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0x00ee, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0x00ec, # LATIN SMALL LETTER N WITH CEDILLA - 0x014c: 0x00e2, # LATIN CAPITAL LETTER O WITH MACRON - 0x014d: 0x0093, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0x008a, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0x008b, # LATIN SMALL LETTER R WITH CEDILLA - 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0x00be, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x00d5, # LATIN SMALL LETTER S WITH CARON - 0x016a: 0x00c7, # LATIN CAPITAL LETTER U WITH MACRON - 0x016b: 0x00d7, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0x00c6, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0x00d6, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0x00a5, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0x00a3, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0x00a4, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0x00cf, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0x00d8, # LATIN SMALL LETTER Z WITH CARON - 0x2019: 0x00ef, # RIGHT SINGLE QUOTATION MARK - 0x201c: 0x00f2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x00a6, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x00f7, # DOUBLE LOW-9 QUOTATION MARK - 0x2219: 0x00f9, # BULLET OPERATOR - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp850.py b/python/Lib/encodings/cp850.py deleted file mode 100755 index 0c8478c8b2..0000000000 --- a/python/Lib/encodings/cp850.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP850.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp850', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH - 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa9' # 0x00b8 -> COPYRIGHT SIGN - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0x00bd -> CENT SIGN - u'\xa5' # 0x00be -> YEN SIGN - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE - u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH - u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH - u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\xa6' # 0x00dd -> BROKEN BAR - u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN - u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE - u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xaf' # 0x00ee -> MACRON - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2017' # 0x00f2 -> DOUBLE LOW LINE - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x00bd, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a5: 0x00be, # YEN SIGN - 0x00a6: 0x00dd, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x00b8, # COPYRIGHT SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00af: 0x00ee, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00f7, # CEDILLA - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x2017: 0x00f2, # DOUBLE LOW LINE - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp852.py b/python/Lib/encodings/cp852.py deleted file mode 100755 index 069d5473b5..0000000000 --- a/python/Lib/encodings/cp852.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP852.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp852', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00f2: 0x02db, # OGONEK - 0x00f3: 0x02c7, # CARON - 0x00f4: 0x02d8, # BREVE - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u016f' # 0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\u0107' # 0x0086 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0150' # 0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\u0151' # 0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0106' # 0x008f -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0139' # 0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u013a' # 0x0092 -> LATIN SMALL LETTER L WITH ACUTE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u013d' # 0x0095 -> LATIN CAPITAL LETTER L WITH CARON - u'\u013e' # 0x0096 -> LATIN SMALL LETTER L WITH CARON - u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0164' # 0x009b -> LATIN CAPITAL LETTER T WITH CARON - u'\u0165' # 0x009c -> LATIN SMALL LETTER T WITH CARON - u'\u0141' # 0x009d -> LATIN CAPITAL LETTER L WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\u010d' # 0x009f -> LATIN SMALL LETTER C WITH CARON - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\u0104' # 0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0105' # 0x00a5 -> LATIN SMALL LETTER A WITH OGONEK - u'\u017d' # 0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017e' # 0x00a7 -> LATIN SMALL LETTER Z WITH CARON - u'\u0118' # 0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0119' # 0x00a9 -> LATIN SMALL LETTER E WITH OGONEK - u'\xac' # 0x00aa -> NOT SIGN - u'\u017a' # 0x00ab -> LATIN SMALL LETTER Z WITH ACUTE - u'\u010c' # 0x00ac -> LATIN CAPITAL LETTER C WITH CARON - u'\u015f' # 0x00ad -> LATIN SMALL LETTER S WITH CEDILLA - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u011a' # 0x00b7 -> LATIN CAPITAL LETTER E WITH CARON - u'\u015e' # 0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u017b' # 0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017c' # 0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u0102' # 0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE - u'\u0103' # 0x00c7 -> LATIN SMALL LETTER A WITH BREVE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\u0111' # 0x00d0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0110' # 0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u010e' # 0x00d2 -> LATIN CAPITAL LETTER D WITH CARON - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u010f' # 0x00d4 -> LATIN SMALL LETTER D WITH CARON - u'\u0147' # 0x00d5 -> LATIN CAPITAL LETTER N WITH CARON - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u011b' # 0x00d8 -> LATIN SMALL LETTER E WITH CARON - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u0162' # 0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\u016e' # 0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0144' # 0x00e4 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0x00e5 -> LATIN SMALL LETTER N WITH CARON - u'\u0160' # 0x00e6 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0161' # 0x00e7 -> LATIN SMALL LETTER S WITH CARON - u'\u0154' # 0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0155' # 0x00ea -> LATIN SMALL LETTER R WITH ACUTE - u'\u0170' # 0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE - u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0163' # 0x00ee -> LATIN SMALL LETTER T WITH CEDILLA - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\u02dd' # 0x00f1 -> DOUBLE ACUTE ACCENT - u'\u02db' # 0x00f2 -> OGONEK - u'\u02c7' # 0x00f3 -> CARON - u'\u02d8' # 0x00f4 -> BREVE - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\u02d9' # 0x00fa -> DOT ABOVE - u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\u0158' # 0x00fc -> LATIN CAPITAL LETTER R WITH CARON - u'\u0159' # 0x00fd -> LATIN SMALL LETTER R WITH CARON - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b8: 0x00f7, # CEDILLA - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE - 0x0102: 0x00c6, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0x00c7, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0x00a4, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0x00a5, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0x008f, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0x0086, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0x00ac, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0x009f, # LATIN SMALL LETTER C WITH CARON - 0x010e: 0x00d2, # LATIN CAPITAL LETTER D WITH CARON - 0x010f: 0x00d4, # LATIN SMALL LETTER D WITH CARON - 0x0110: 0x00d1, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0x00d0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0x00a8, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0x00a9, # LATIN SMALL LETTER E WITH OGONEK - 0x011a: 0x00b7, # LATIN CAPITAL LETTER E WITH CARON - 0x011b: 0x00d8, # LATIN SMALL LETTER E WITH CARON - 0x0139: 0x0091, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013a: 0x0092, # LATIN SMALL LETTER L WITH ACUTE - 0x013d: 0x0095, # LATIN CAPITAL LETTER L WITH CARON - 0x013e: 0x0096, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0x009d, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0x00e4, # LATIN SMALL LETTER N WITH ACUTE - 0x0147: 0x00d5, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0x00e5, # LATIN SMALL LETTER N WITH CARON - 0x0150: 0x008a, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0x008b, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0x00e8, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0x00ea, # LATIN SMALL LETTER R WITH ACUTE - 0x0158: 0x00fc, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0x00fd, # LATIN SMALL LETTER R WITH CARON - 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE - 0x015e: 0x00b8, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0x00ad, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0x00e6, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x00e7, # LATIN SMALL LETTER S WITH CARON - 0x0162: 0x00dd, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x0163: 0x00ee, # LATIN SMALL LETTER T WITH CEDILLA - 0x0164: 0x009b, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0x009c, # LATIN SMALL LETTER T WITH CARON - 0x016e: 0x00de, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016f: 0x0085, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0x00eb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0x00ab, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0x00bd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0x00be, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0x00a6, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0x00a7, # LATIN SMALL LETTER Z WITH CARON - 0x02c7: 0x00f3, # CARON - 0x02d8: 0x00f4, # BREVE - 0x02d9: 0x00fa, # DOT ABOVE - 0x02db: 0x00f2, # OGONEK - 0x02dd: 0x00f1, # DOUBLE ACUTE ACCENT - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp855.py b/python/Lib/encodings/cp855.py deleted file mode 100755 index 241ef9d1e8..0000000000 --- a/python/Lib/encodings/cp855.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP855.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp855', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO - 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI - 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE - 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU - 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I - 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O - 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00ef: 0x2116, # NUMERO SIGN - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E - 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00fd: 0x00a7, # SECTION SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE - u'\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE - u'\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE - u'\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE - u'\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO - u'\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO - u'\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE - u'\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE - u'\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI - u'\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI - u'\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE - u'\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE - u'\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE - u'\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE - u'\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE - u'\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE - u'\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE - u'\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE - u'\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE - u'\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE - u'\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U - u'\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE - u'\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE - u'\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU - u'\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU - u'\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A - u'\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A - u'\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE - u'\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE - u'\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE - u'\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE - u'\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE - u'\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE - u'\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE - u'\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE - u'\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF - u'\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF - u'\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE - u'\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA - u'\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA - u'\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I - u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I - u'\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA - u'\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL - u'\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL - u'\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM - u'\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM - u'\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN - u'\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN - u'\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O - u'\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O - u'\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE - u'\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA - u'\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER - u'\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER - u'\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES - u'\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES - u'\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE - u'\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE - u'\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U - u'\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U - u'\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE - u'\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE - u'\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE - u'\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE - u'\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u2116' # 0x00ef -> NUMERO SIGN - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU - u'\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU - u'\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE - u'\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE - u'\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA - u'\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA - u'\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E - u'\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E - u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA - u'\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE - u'\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE - u'\xa7' # 0x00fd -> SECTION SIGN - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a7: 0x00fd, # SECTION SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE - 0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE - 0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE - 0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE - 0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U - 0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A - 0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I - 0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O - 0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U - 0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E - 0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A - 0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE - 0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE - 0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE - 0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE - 0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE - 0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE - 0x0438: 0x00b7, # CYRILLIC SMALL LETTER I - 0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA - 0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL - 0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM - 0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN - 0x043e: 0x00d6, # CYRILLIC SMALL LETTER O - 0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE - 0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER - 0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES - 0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE - 0x0443: 0x00e7, # CYRILLIC SMALL LETTER U - 0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF - 0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA - 0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE - 0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE - 0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA - 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU - 0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0x00f7, # CYRILLIC SMALL LETTER E - 0x044e: 0x009c, # CYRILLIC SMALL LETTER YU - 0x044f: 0x00de, # CYRILLIC SMALL LETTER YA - 0x0451: 0x0084, # CYRILLIC SMALL LETTER IO - 0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE - 0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE - 0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE - 0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0x008c, # CYRILLIC SMALL LETTER YI - 0x0458: 0x008e, # CYRILLIC SMALL LETTER JE - 0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE - 0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE - 0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE - 0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE - 0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U - 0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE - 0x2116: 0x00ef, # NUMERO SIGN - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp856.py b/python/Lib/encodings/cp856.py deleted file mode 100755 index 203c2c4ca0..0000000000 --- a/python/Lib/encodings/cp856.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp856 generated from 'MAPPINGS/VENDORS/MISC/CP856.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp856', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u05d0' # 0x80 -> HEBREW LETTER ALEF - u'\u05d1' # 0x81 -> HEBREW LETTER BET - u'\u05d2' # 0x82 -> HEBREW LETTER GIMEL - u'\u05d3' # 0x83 -> HEBREW LETTER DALET - u'\u05d4' # 0x84 -> HEBREW LETTER HE - u'\u05d5' # 0x85 -> HEBREW LETTER VAV - u'\u05d6' # 0x86 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0x87 -> HEBREW LETTER HET - u'\u05d8' # 0x88 -> HEBREW LETTER TET - u'\u05d9' # 0x89 -> HEBREW LETTER YOD - u'\u05da' # 0x8A -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x8B -> HEBREW LETTER KAF - u'\u05dc' # 0x8C -> HEBREW LETTER LAMED - u'\u05dd' # 0x8D -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x8E -> HEBREW LETTER MEM - u'\u05df' # 0x8F -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0x90 -> HEBREW LETTER NUN - u'\u05e1' # 0x91 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0x92 -> HEBREW LETTER AYIN - u'\u05e3' # 0x93 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0x94 -> HEBREW LETTER PE - u'\u05e5' # 0x95 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0x96 -> HEBREW LETTER TSADI - u'\u05e7' # 0x97 -> HEBREW LETTER QOF - u'\u05e8' # 0x98 -> HEBREW LETTER RESH - u'\u05e9' # 0x99 -> HEBREW LETTER SHIN - u'\u05ea' # 0x9A -> HEBREW LETTER TAV - u'\ufffe' # 0x9B -> UNDEFINED - u'\xa3' # 0x9C -> POUND SIGN - u'\ufffe' # 0x9D -> UNDEFINED - u'\xd7' # 0x9E -> MULTIPLICATION SIGN - u'\ufffe' # 0x9F -> UNDEFINED - u'\ufffe' # 0xA0 -> UNDEFINED - u'\ufffe' # 0xA1 -> UNDEFINED - u'\ufffe' # 0xA2 -> UNDEFINED - u'\ufffe' # 0xA3 -> UNDEFINED - u'\ufffe' # 0xA4 -> UNDEFINED - u'\ufffe' # 0xA5 -> UNDEFINED - u'\ufffe' # 0xA6 -> UNDEFINED - u'\ufffe' # 0xA7 -> UNDEFINED - u'\ufffe' # 0xA8 -> UNDEFINED - u'\xae' # 0xA9 -> REGISTERED SIGN - u'\xac' # 0xAA -> NOT SIGN - u'\xbd' # 0xAB -> VULGAR FRACTION ONE HALF - u'\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER - u'\ufffe' # 0xAD -> UNDEFINED - u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0xB0 -> LIGHT SHADE - u'\u2592' # 0xB1 -> MEDIUM SHADE - u'\u2593' # 0xB2 -> DARK SHADE - u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\ufffe' # 0xB5 -> UNDEFINED - u'\ufffe' # 0xB6 -> UNDEFINED - u'\ufffe' # 0xB7 -> UNDEFINED - u'\xa9' # 0xB8 -> COPYRIGHT SIGN - u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0xBD -> CENT SIGN - u'\xa5' # 0xBE -> YEN SIGN - u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\ufffe' # 0xC6 -> UNDEFINED - u'\ufffe' # 0xC7 -> UNDEFINED - u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0xCF -> CURRENCY SIGN - u'\ufffe' # 0xD0 -> UNDEFINED - u'\ufffe' # 0xD1 -> UNDEFINED - u'\ufffe' # 0xD2 -> UNDEFINED - u'\ufffe' # 0xD3 -> UNDEFINEDS - u'\ufffe' # 0xD4 -> UNDEFINED - u'\ufffe' # 0xD5 -> UNDEFINED - u'\ufffe' # 0xD6 -> UNDEFINEDE - u'\ufffe' # 0xD7 -> UNDEFINED - u'\ufffe' # 0xD8 -> UNDEFINED - u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0xDB -> FULL BLOCK - u'\u2584' # 0xDC -> LOWER HALF BLOCK - u'\xa6' # 0xDD -> BROKEN BAR - u'\ufffe' # 0xDE -> UNDEFINED - u'\u2580' # 0xDF -> UPPER HALF BLOCK - u'\ufffe' # 0xE0 -> UNDEFINED - u'\ufffe' # 0xE1 -> UNDEFINED - u'\ufffe' # 0xE2 -> UNDEFINED - u'\ufffe' # 0xE3 -> UNDEFINED - u'\ufffe' # 0xE4 -> UNDEFINED - u'\ufffe' # 0xE5 -> UNDEFINED - u'\xb5' # 0xE6 -> MICRO SIGN - u'\ufffe' # 0xE7 -> UNDEFINED - u'\ufffe' # 0xE8 -> UNDEFINED - u'\ufffe' # 0xE9 -> UNDEFINED - u'\ufffe' # 0xEA -> UNDEFINED - u'\ufffe' # 0xEB -> UNDEFINED - u'\ufffe' # 0xEC -> UNDEFINED - u'\ufffe' # 0xED -> UNDEFINED - u'\xaf' # 0xEE -> MACRON - u'\xb4' # 0xEF -> ACUTE ACCENT - u'\xad' # 0xF0 -> SOFT HYPHEN - u'\xb1' # 0xF1 -> PLUS-MINUS SIGN - u'\u2017' # 0xF2 -> DOUBLE LOW LINE - u'\xbe' # 0xF3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0xF4 -> PILCROW SIGN - u'\xa7' # 0xF5 -> SECTION SIGN - u'\xf7' # 0xF6 -> DIVISION SIGN - u'\xb8' # 0xF7 -> CEDILLA - u'\xb0' # 0xF8 -> DEGREE SIGN - u'\xa8' # 0xF9 -> DIAERESIS - u'\xb7' # 0xFA -> MIDDLE DOT - u'\xb9' # 0xFB -> SUPERSCRIPT ONE - u'\xb3' # 0xFC -> SUPERSCRIPT THREE - u'\xb2' # 0xFD -> SUPERSCRIPT TWO - u'\u25a0' # 0xFE -> BLACK SQUARE - u'\xa0' # 0xFF -> NO-BREAK SPACE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp857.py b/python/Lib/encodings/cp857.py deleted file mode 100755 index c24191b04d..0000000000 --- a/python/Lib/encodings/cp857.py +++ /dev/null @@ -1,694 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP857.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp857', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: None, # UNDEFINED - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: None, # UNDEFINED - 0x00e8: 0x00d7, # MULTIPLICATION SIGN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: None, # UNDEFINED - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u0131' # 0x008d -> LATIN SMALL LETTER DOTLESS I - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\u0130' # 0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\u015e' # 0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u015f' # 0x009f -> LATIN SMALL LETTER S WITH CEDILLA - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u011e' # 0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE - u'\u011f' # 0x00a7 -> LATIN SMALL LETTER G WITH BREVE - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa9' # 0x00b8 -> COPYRIGHT SIGN - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0x00bd -> CENT SIGN - u'\xa5' # 0x00be -> YEN SIGN - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE - u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\xba' # 0x00d0 -> MASCULINE ORDINAL INDICATOR - u'\xaa' # 0x00d1 -> FEMININE ORDINAL INDICATOR - u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\ufffe' # 0x00d5 -> UNDEFINED - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\xa6' # 0x00dd -> BROKEN BAR - u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\ufffe' # 0x00e7 -> UNDEFINED - u'\xd7' # 0x00e8 -> MULTIPLICATION SIGN - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE - u'\xff' # 0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xaf' # 0x00ee -> MACRON - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\ufffe' # 0x00f2 -> UNDEFINED - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x00bd, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a5: 0x00be, # YEN SIGN - 0x00a6: 0x00dd, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x00b8, # COPYRIGHT SIGN - 0x00aa: 0x00d1, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00af: 0x00ee, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00f7, # CEDILLA - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00ba: 0x00d0, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x00e8, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0x00ed, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011e: 0x00a6, # LATIN CAPITAL LETTER G WITH BREVE - 0x011f: 0x00a7, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0x0098, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0x008d, # LATIN SMALL LETTER DOTLESS I - 0x015e: 0x009e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0x009f, # LATIN SMALL LETTER S WITH CEDILLA - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp858.py b/python/Lib/encodings/cp858.py deleted file mode 100644 index 7ba7621f8f..0000000000 --- a/python/Lib/encodings/cp858.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec for CP858, modified from cp850. - -""" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp858', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH - 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: 0x20ac, # EURO SIGN - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa9' # 0x00b8 -> COPYRIGHT SIGN - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0x00bd -> CENT SIGN - u'\xa5' # 0x00be -> YEN SIGN - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE - u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH - u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH - u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\u20ac' # 0x00d5 -> EURO SIGN - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\xa6' # 0x00dd -> BROKEN BAR - u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN - u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE - u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xaf' # 0x00ee -> MACRON - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2017' # 0x00f2 -> DOUBLE LOW LINE - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x00bd, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a5: 0x00be, # YEN SIGN - 0x00a6: 0x00dd, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x00b8, # COPYRIGHT SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00af: 0x00ee, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00f7, # CEDILLA - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x20ac: 0x00d5, # EURO SIGN - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x2017: 0x00f2, # DOUBLE LOW LINE - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp860.py b/python/Lib/encodings/cp860.py deleted file mode 100755 index 4acb0cf362..0000000000 --- a/python/Lib/encodings/cp860.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP860.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp860', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u20a7' # 0x009e -> PESETA SIGN - u'\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp861.py b/python/Lib/encodings/cp861.py deleted file mode 100755 index 0939b5b1ee..0000000000 --- a/python/Lib/encodings/cp861.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP861.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp861', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x008c: 0x00f0, # LATIN SMALL LETTER ETH - 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00fe, # LATIN SMALL LETTER THORN - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH - u'\xf0' # 0x008c -> LATIN SMALL LETTER ETH - u'\xde' # 0x008d -> LATIN CAPITAL LETTER THORN - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xfe' # 0x0095 -> LATIN SMALL LETTER THORN - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a3: 0x009c, # POUND SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH - 0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0x008d, # LATIN CAPITAL LETTER THORN - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00f0: 0x008c, # LATIN SMALL LETTER ETH - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x0095, # LATIN SMALL LETTER THORN - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp862.py b/python/Lib/encodings/cp862.py deleted file mode 100755 index ea0405ca1b..0000000000 --- a/python/Lib/encodings/cp862.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP862.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp862', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x05d0, # HEBREW LETTER ALEF - 0x0081: 0x05d1, # HEBREW LETTER BET - 0x0082: 0x05d2, # HEBREW LETTER GIMEL - 0x0083: 0x05d3, # HEBREW LETTER DALET - 0x0084: 0x05d4, # HEBREW LETTER HE - 0x0085: 0x05d5, # HEBREW LETTER VAV - 0x0086: 0x05d6, # HEBREW LETTER ZAYIN - 0x0087: 0x05d7, # HEBREW LETTER HET - 0x0088: 0x05d8, # HEBREW LETTER TET - 0x0089: 0x05d9, # HEBREW LETTER YOD - 0x008a: 0x05da, # HEBREW LETTER FINAL KAF - 0x008b: 0x05db, # HEBREW LETTER KAF - 0x008c: 0x05dc, # HEBREW LETTER LAMED - 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM - 0x008e: 0x05de, # HEBREW LETTER MEM - 0x008f: 0x05df, # HEBREW LETTER FINAL NUN - 0x0090: 0x05e0, # HEBREW LETTER NUN - 0x0091: 0x05e1, # HEBREW LETTER SAMEKH - 0x0092: 0x05e2, # HEBREW LETTER AYIN - 0x0093: 0x05e3, # HEBREW LETTER FINAL PE - 0x0094: 0x05e4, # HEBREW LETTER PE - 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0096: 0x05e6, # HEBREW LETTER TSADI - 0x0097: 0x05e7, # HEBREW LETTER QOF - 0x0098: 0x05e8, # HEBREW LETTER RESH - 0x0099: 0x05e9, # HEBREW LETTER SHIN - 0x009a: 0x05ea, # HEBREW LETTER TAV - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF - u'\u05d1' # 0x0081 -> HEBREW LETTER BET - u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL - u'\u05d3' # 0x0083 -> HEBREW LETTER DALET - u'\u05d4' # 0x0084 -> HEBREW LETTER HE - u'\u05d5' # 0x0085 -> HEBREW LETTER VAV - u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0x0087 -> HEBREW LETTER HET - u'\u05d8' # 0x0088 -> HEBREW LETTER TET - u'\u05d9' # 0x0089 -> HEBREW LETTER YOD - u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x008b -> HEBREW LETTER KAF - u'\u05dc' # 0x008c -> HEBREW LETTER LAMED - u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x008e -> HEBREW LETTER MEM - u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0x0090 -> HEBREW LETTER NUN - u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN - u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0x0094 -> HEBREW LETTER PE - u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI - u'\u05e7' # 0x0097 -> HEBREW LETTER QOF - u'\u05e8' # 0x0098 -> HEBREW LETTER RESH - u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN - u'\u05ea' # 0x009a -> HEBREW LETTER TAV - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xa5' # 0x009d -> YEN SIGN - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a5: 0x009d, # YEN SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x05d0: 0x0080, # HEBREW LETTER ALEF - 0x05d1: 0x0081, # HEBREW LETTER BET - 0x05d2: 0x0082, # HEBREW LETTER GIMEL - 0x05d3: 0x0083, # HEBREW LETTER DALET - 0x05d4: 0x0084, # HEBREW LETTER HE - 0x05d5: 0x0085, # HEBREW LETTER VAV - 0x05d6: 0x0086, # HEBREW LETTER ZAYIN - 0x05d7: 0x0087, # HEBREW LETTER HET - 0x05d8: 0x0088, # HEBREW LETTER TET - 0x05d9: 0x0089, # HEBREW LETTER YOD - 0x05da: 0x008a, # HEBREW LETTER FINAL KAF - 0x05db: 0x008b, # HEBREW LETTER KAF - 0x05dc: 0x008c, # HEBREW LETTER LAMED - 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM - 0x05de: 0x008e, # HEBREW LETTER MEM - 0x05df: 0x008f, # HEBREW LETTER FINAL NUN - 0x05e0: 0x0090, # HEBREW LETTER NUN - 0x05e1: 0x0091, # HEBREW LETTER SAMEKH - 0x05e2: 0x0092, # HEBREW LETTER AYIN - 0x05e3: 0x0093, # HEBREW LETTER FINAL PE - 0x05e4: 0x0094, # HEBREW LETTER PE - 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI - 0x05e6: 0x0096, # HEBREW LETTER TSADI - 0x05e7: 0x0097, # HEBREW LETTER QOF - 0x05e8: 0x0098, # HEBREW LETTER RESH - 0x05e9: 0x0099, # HEBREW LETTER SHIN - 0x05ea: 0x009a, # HEBREW LETTER TAV - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp863.py b/python/Lib/encodings/cp863.py deleted file mode 100755 index 62dfabf66a..0000000000 --- a/python/Lib/encodings/cp863.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP863.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp863', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00b6, # PILCROW SIGN - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x2017, # DOUBLE LOW LINE - 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x008f: 0x00a7, # SECTION SIGN - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00a4, # CURRENCY SIGN - 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00a6, # BROKEN BAR - 0x00a1: 0x00b4, # ACUTE ACCENT - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00a8, # DIAERESIS - 0x00a5: 0x00b8, # CEDILLA - 0x00a6: 0x00b3, # SUPERSCRIPT THREE - 0x00a7: 0x00af, # MACRON - 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xb6' # 0x0086 -> PILCROW SIGN - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u2017' # 0x008d -> DOUBLE LOW LINE - u'\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa7' # 0x008f -> SECTION SIGN - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xa4' # 0x0098 -> CURRENCY SIGN - u'\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xa6' # 0x00a0 -> BROKEN BAR - u'\xb4' # 0x00a1 -> ACUTE ACCENT - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xa8' # 0x00a4 -> DIAERESIS - u'\xb8' # 0x00a5 -> CEDILLA - u'\xb3' # 0x00a6 -> SUPERSCRIPT THREE - u'\xaf' # 0x00a7 -> MACRON - u'\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x0098, # CURRENCY SIGN - 0x00a6: 0x00a0, # BROKEN BAR - 0x00a7: 0x008f, # SECTION SIGN - 0x00a8: 0x00a4, # DIAERESIS - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00af: 0x00a7, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00a6, # SUPERSCRIPT THREE - 0x00b4: 0x00a1, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x0086, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00a5, # CEDILLA - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS - 0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x2017: 0x008d, # DOUBLE LOW LINE - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp864.py b/python/Lib/encodings/cp864.py deleted file mode 100755 index 02a0e733a8..0000000000 --- a/python/Lib/encodings/cp864.py +++ /dev/null @@ -1,690 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP864.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp864', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0025: 0x066a, # ARABIC PERCENT SIGN - 0x0080: 0x00b0, # DEGREE SIGN - 0x0081: 0x00b7, # MIDDLE DOT - 0x0082: 0x2219, # BULLET OPERATOR - 0x0083: 0x221a, # SQUARE ROOT - 0x0084: 0x2592, # MEDIUM SHADE - 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL - 0x0086: 0x2502, # FORMS LIGHT VERTICAL - 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL - 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT - 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL - 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT - 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL - 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT - 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT - 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT - 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT - 0x0090: 0x03b2, # GREEK SMALL BETA - 0x0091: 0x221e, # INFINITY - 0x0092: 0x03c6, # GREEK SMALL PHI - 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN - 0x0094: 0x00bd, # FRACTION 1/2 - 0x0095: 0x00bc, # FRACTION 1/4 - 0x0096: 0x2248, # ALMOST EQUAL TO - 0x0097: 0x00ab, # LEFT POINTING GUILLEMET - 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET - 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM - 0x009f: None, # UNDEFINED - 0x00a1: 0x00ad, # SOFT HYPHEN - 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - 0x00a6: None, # UNDEFINED - 0x00a7: None, # UNDEFINED - 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM - 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM - 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM - 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM - 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM - 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO - 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE - 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO - 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE - 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR - 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE - 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX - 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN - 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT - 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE - 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM - 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM - 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x00a2, # CENT SIGN - 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM - 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM - 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM - 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM - 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM - 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM - 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM - 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM - 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM - 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM - 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM - 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM - 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM - 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM - 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM - 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM - 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM - 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM - 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM - 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM - 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM - 0x00db: 0x00a6, # BROKEN VERTICAL BAR - 0x00dc: 0x00ac, # NOT SIGN - 0x00dd: 0x00f7, # DIVISION SIGN - 0x00de: 0x00d7, # MULTIPLICATION SIGN - 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM - 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM - 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM - 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM - 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM - 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM - 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM - 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM - 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM - 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM - 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM - 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM - 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM - 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM - 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM - 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM - 0x00f1: 0x0651, # ARABIC SHADDAH - 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM - 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM - 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM - 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM - 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM - 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM - 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM - 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM - 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM - 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: None, # UNDEFINED -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'\u066a' # 0x0025 -> ARABIC PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xb0' # 0x0080 -> DEGREE SIGN - u'\xb7' # 0x0081 -> MIDDLE DOT - u'\u2219' # 0x0082 -> BULLET OPERATOR - u'\u221a' # 0x0083 -> SQUARE ROOT - u'\u2592' # 0x0084 -> MEDIUM SHADE - u'\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL - u'\u2502' # 0x0086 -> FORMS LIGHT VERTICAL - u'\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL - u'\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT - u'\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT - u'\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL - u'\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT - u'\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT - u'\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT - u'\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT - u'\u03b2' # 0x0090 -> GREEK SMALL BETA - u'\u221e' # 0x0091 -> INFINITY - u'\u03c6' # 0x0092 -> GREEK SMALL PHI - u'\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN - u'\xbd' # 0x0094 -> FRACTION 1/2 - u'\xbc' # 0x0095 -> FRACTION 1/4 - u'\u2248' # 0x0096 -> ALMOST EQUAL TO - u'\xab' # 0x0097 -> LEFT POINTING GUILLEMET - u'\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET - u'\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - u'\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - u'\ufffe' # 0x009b -> UNDEFINED - u'\ufffe' # 0x009c -> UNDEFINED - u'\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - u'\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM - u'\ufffe' # 0x009f -> UNDEFINED - u'\xa0' # 0x00a0 -> NON-BREAKING SPACE - u'\xad' # 0x00a1 -> SOFT HYPHEN - u'\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - u'\xa3' # 0x00a3 -> POUND SIGN - u'\xa4' # 0x00a4 -> CURRENCY SIGN - u'\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - u'\ufffe' # 0x00a6 -> UNDEFINED - u'\ufffe' # 0x00a7 -> UNDEFINED - u'\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM - u'\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM - u'\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM - u'\u060c' # 0x00ac -> ARABIC COMMA - u'\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM - u'\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM - u'\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM - u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO - u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE - u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO - u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE - u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR - u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE - u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX - u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN - u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT - u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE - u'\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM - u'\u061b' # 0x00bb -> ARABIC SEMICOLON - u'\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM - u'\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM - u'\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM - u'\u061f' # 0x00bf -> ARABIC QUESTION MARK - u'\xa2' # 0x00c0 -> CENT SIGN - u'\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM - u'\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - u'\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - u'\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - u'\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM - u'\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - u'\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM - u'\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM - u'\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM - u'\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM - u'\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM - u'\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM - u'\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM - u'\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM - u'\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM - u'\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM - u'\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM - u'\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM - u'\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM - u'\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM - u'\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM - u'\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM - u'\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM - u'\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM - u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM - u'\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM - u'\xa6' # 0x00db -> BROKEN VERTICAL BAR - u'\xac' # 0x00dc -> NOT SIGN - u'\xf7' # 0x00dd -> DIVISION SIGN - u'\xd7' # 0x00de -> MULTIPLICATION SIGN - u'\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM - u'\u0640' # 0x00e0 -> ARABIC TATWEEL - u'\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM - u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM - u'\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM - u'\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM - u'\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM - u'\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM - u'\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM - u'\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM - u'\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM - u'\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM - u'\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM - u'\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM - u'\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM - u'\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM - u'\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM - u'\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM - u'\u0651' # 0x00f1 -> ARABIC SHADDAH - u'\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM - u'\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM - u'\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM - u'\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM - u'\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM - u'\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM - u'\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM - u'\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - u'\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - u'\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM - u'\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM - u'\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\ufffe' # 0x00ff -> UNDEFINED -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00a0, # NON-BREAKING SPACE - 0x00a2: 0x00c0, # CENT SIGN - 0x00a3: 0x00a3, # POUND SIGN - 0x00a4: 0x00a4, # CURRENCY SIGN - 0x00a6: 0x00db, # BROKEN VERTICAL BAR - 0x00ab: 0x0097, # LEFT POINTING GUILLEMET - 0x00ac: 0x00dc, # NOT SIGN - 0x00ad: 0x00a1, # SOFT HYPHEN - 0x00b0: 0x0080, # DEGREE SIGN - 0x00b1: 0x0093, # PLUS-OR-MINUS SIGN - 0x00b7: 0x0081, # MIDDLE DOT - 0x00bb: 0x0098, # RIGHT POINTING GUILLEMET - 0x00bc: 0x0095, # FRACTION 1/4 - 0x00bd: 0x0094, # FRACTION 1/2 - 0x00d7: 0x00de, # MULTIPLICATION SIGN - 0x00f7: 0x00dd, # DIVISION SIGN - 0x03b2: 0x0090, # GREEK SMALL BETA - 0x03c6: 0x0092, # GREEK SMALL PHI - 0x060c: 0x00ac, # ARABIC COMMA - 0x061b: 0x00bb, # ARABIC SEMICOLON - 0x061f: 0x00bf, # ARABIC QUESTION MARK - 0x0640: 0x00e0, # ARABIC TATWEEL - 0x0651: 0x00f1, # ARABIC SHADDAH - 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO - 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE - 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO - 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE - 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR - 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE - 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX - 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN - 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT - 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE - 0x066a: 0x0025, # ARABIC PERCENT SIGN - 0x2219: 0x0082, # BULLET OPERATOR - 0x221a: 0x0083, # SQUARE ROOT - 0x221e: 0x0091, # INFINITY - 0x2248: 0x0096, # ALMOST EQUAL TO - 0x2500: 0x0085, # FORMS LIGHT HORIZONTAL - 0x2502: 0x0086, # FORMS LIGHT VERTICAL - 0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT - 0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT - 0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT - 0x2518: 0x008f, # FORMS LIGHT UP AND LEFT - 0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT - 0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT - 0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL - 0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL - 0x2592: 0x0084, # MEDIUM SHADE - 0x25a0: 0x00fe, # BLACK SQUARE - 0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM - 0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM - 0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - 0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - 0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - 0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM - 0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM - 0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM - 0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM - 0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM - 0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM - 0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM - 0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM - 0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM - 0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM - 0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM - 0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM - 0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM - 0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM - 0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM - 0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM - 0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM - 0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM - 0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM - 0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM - 0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM - 0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM - 0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM - 0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM - 0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM - 0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM - 0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM - 0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM - 0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM - 0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM - 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM - 0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM - 0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM - 0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM - 0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM - 0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM - 0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM - 0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM - 0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM - 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM - 0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM - 0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM - 0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM - 0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM - 0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM - 0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM - 0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM - 0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM - 0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM - 0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM - 0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM - 0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM - 0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM - 0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM - 0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM - 0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM - 0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM - 0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - 0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - 0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - 0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - 0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - 0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM -} diff --git a/python/Lib/encodings/cp865.py b/python/Lib/encodings/cp865.py deleted file mode 100755 index e9f45f1b50..0000000000 --- a/python/Lib/encodings/cp865.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP865.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp865', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00a4, # CURRENCY SIGN - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xa4' # 0x00af -> CURRENCY SIGN - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00af, # CURRENCY SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp866.py b/python/Lib/encodings/cp866.py deleted file mode 100755 index 29cd85a3f1..0000000000 --- a/python/Lib/encodings/cp866.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp866', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O - 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E - 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x2116, # NUMERO SIGN - 0x00fd: 0x00a4, # CURRENCY SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O - u'\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E - u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA - u'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO - u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO - u'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI - u'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI - u'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u2116' # 0x00fc -> NUMERO SIGN - u'\xa4' # 0x00fd -> CURRENCY SIGN - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a4: 0x00fd, # CURRENCY SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO - 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI - 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U - 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A - 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I - 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O - 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U - 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E - 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A - 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE - 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE - 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE - 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE - 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I - 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA - 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL - 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM - 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN - 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O - 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE - 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER - 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES - 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE - 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U - 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF - 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA - 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU - 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E - 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU - 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA - 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO - 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI - 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U - 0x2116: 0x00fc, # NUMERO SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp869.py b/python/Lib/encodings/cp869.py deleted file mode 100755 index b4dc99bf25..0000000000 --- a/python/Lib/encodings/cp869.py +++ /dev/null @@ -1,689 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP869.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp869', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: None, # UNDEFINED - 0x0081: None, # UNDEFINED - 0x0082: None, # UNDEFINED - 0x0083: None, # UNDEFINED - 0x0084: None, # UNDEFINED - 0x0085: None, # UNDEFINED - 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0087: None, # UNDEFINED - 0x0088: 0x00b7, # MIDDLE DOT - 0x0089: 0x00ac, # NOT SIGN - 0x008a: 0x00a6, # BROKEN BAR - 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x008e: 0x2015, # HORIZONTAL BAR - 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x0093: None, # UNDEFINED - 0x0094: None, # UNDEFINED - 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x0097: 0x00a9, # COPYRIGHT SIGN - 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0099: 0x00b2, # SUPERSCRIPT TWO - 0x009a: 0x00b3, # SUPERSCRIPT THREE - 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU - 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI - 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA - 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00e6: 0x03bc, # GREEK SMALL LETTER MU - 0x00e7: 0x03bd, # GREEK SMALL LETTER NU - 0x00e8: 0x03be, # GREEK SMALL LETTER XI - 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00ea: 0x03c0, # GREEK SMALL LETTER PI - 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO - 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ef: 0x0384, # GREEK TONOS - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\ufffe' # 0x0080 -> UNDEFINED - u'\ufffe' # 0x0081 -> UNDEFINED - u'\ufffe' # 0x0082 -> UNDEFINED - u'\ufffe' # 0x0083 -> UNDEFINED - u'\ufffe' # 0x0084 -> UNDEFINED - u'\ufffe' # 0x0085 -> UNDEFINED - u'\u0386' # 0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\ufffe' # 0x0087 -> UNDEFINED - u'\xb7' # 0x0088 -> MIDDLE DOT - u'\xac' # 0x0089 -> NOT SIGN - u'\xa6' # 0x008a -> BROKEN BAR - u'\u2018' # 0x008b -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x008c -> RIGHT SINGLE QUOTATION MARK - u'\u0388' # 0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u2015' # 0x008e -> HORIZONTAL BAR - u'\u0389' # 0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u03aa' # 0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u038c' # 0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\ufffe' # 0x0093 -> UNDEFINED - u'\ufffe' # 0x0094 -> UNDEFINED - u'\u038e' # 0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u03ab' # 0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\xa9' # 0x0097 -> COPYRIGHT SIGN - u'\u038f' # 0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\xb2' # 0x0099 -> SUPERSCRIPT TWO - u'\xb3' # 0x009a -> SUPERSCRIPT THREE - u'\u03ac' # 0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\xa3' # 0x009c -> POUND SIGN - u'\u03ad' # 0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0x009e -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0x009f -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03ca' # 0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u0390' # 0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03cc' # 0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u0391' # 0x00a4 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0x00a5 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0x00a6 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0x00a7 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0x00a8 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0x00a9 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0x00aa -> GREEK CAPITAL LETTER ETA - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\u0398' # 0x00ac -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0x00ad -> GREEK CAPITAL LETTER IOTA - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u039a' # 0x00b5 -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0x00b6 -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0x00b7 -> GREEK CAPITAL LETTER MU - u'\u039d' # 0x00b8 -> GREEK CAPITAL LETTER NU - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u039e' # 0x00bd -> GREEK CAPITAL LETTER XI - u'\u039f' # 0x00be -> GREEK CAPITAL LETTER OMICRON - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u03a0' # 0x00c6 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0x00c7 -> GREEK CAPITAL LETTER RHO - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u03a3' # 0x00cf -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0x00d0 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0x00d1 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0x00d2 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0x00d3 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0x00d4 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0x00d5 -> GREEK CAPITAL LETTER OMEGA - u'\u03b1' # 0x00d6 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0x00d7 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0x00d8 -> GREEK SMALL LETTER GAMMA - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u03b4' # 0x00dd -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0x00de -> GREEK SMALL LETTER EPSILON - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b6' # 0x00e0 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0x00e1 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0x00e2 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0x00e3 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0x00e4 -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0x00e5 -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0x00e6 -> GREEK SMALL LETTER MU - u'\u03bd' # 0x00e7 -> GREEK SMALL LETTER NU - u'\u03be' # 0x00e8 -> GREEK SMALL LETTER XI - u'\u03bf' # 0x00e9 -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0x00ea -> GREEK SMALL LETTER PI - u'\u03c1' # 0x00eb -> GREEK SMALL LETTER RHO - u'\u03c3' # 0x00ec -> GREEK SMALL LETTER SIGMA - u'\u03c2' # 0x00ed -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c4' # 0x00ee -> GREEK SMALL LETTER TAU - u'\u0384' # 0x00ef -> GREEK TONOS - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u03c5' # 0x00f2 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0x00f3 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0x00f4 -> GREEK SMALL LETTER CHI - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\u03c8' # 0x00f6 -> GREEK SMALL LETTER PSI - u'\u0385' # 0x00f7 -> GREEK DIALYTIKA TONOS - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\u03c9' # 0x00fa -> GREEK SMALL LETTER OMEGA - u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03b0' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03ce' # 0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a3: 0x009c, # POUND SIGN - 0x00a6: 0x008a, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x0097, # COPYRIGHT SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x0089, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x0099, # SUPERSCRIPT TWO - 0x00b3: 0x009a, # SUPERSCRIPT THREE - 0x00b7: 0x0088, # MIDDLE DOT - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x0384: 0x00ef, # GREEK TONOS - 0x0385: 0x00f7, # GREEK DIALYTIKA TONOS - 0x0386: 0x0086, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0x008d, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0x008f, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038a: 0x0090, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038c: 0x0092, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038e: 0x0095, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038f: 0x0098, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0x00a1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0x00a4, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0x00a5, # GREEK CAPITAL LETTER BETA - 0x0393: 0x00a6, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0x00a7, # GREEK CAPITAL LETTER DELTA - 0x0395: 0x00a8, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0x00a9, # GREEK CAPITAL LETTER ZETA - 0x0397: 0x00aa, # GREEK CAPITAL LETTER ETA - 0x0398: 0x00ac, # GREEK CAPITAL LETTER THETA - 0x0399: 0x00ad, # GREEK CAPITAL LETTER IOTA - 0x039a: 0x00b5, # GREEK CAPITAL LETTER KAPPA - 0x039b: 0x00b6, # GREEK CAPITAL LETTER LAMDA - 0x039c: 0x00b7, # GREEK CAPITAL LETTER MU - 0x039d: 0x00b8, # GREEK CAPITAL LETTER NU - 0x039e: 0x00bd, # GREEK CAPITAL LETTER XI - 0x039f: 0x00be, # GREEK CAPITAL LETTER OMICRON - 0x03a0: 0x00c6, # GREEK CAPITAL LETTER PI - 0x03a1: 0x00c7, # GREEK CAPITAL LETTER RHO - 0x03a3: 0x00cf, # GREEK CAPITAL LETTER SIGMA - 0x03a4: 0x00d0, # GREEK CAPITAL LETTER TAU - 0x03a5: 0x00d1, # GREEK CAPITAL LETTER UPSILON - 0x03a6: 0x00d2, # GREEK CAPITAL LETTER PHI - 0x03a7: 0x00d3, # GREEK CAPITAL LETTER CHI - 0x03a8: 0x00d4, # GREEK CAPITAL LETTER PSI - 0x03a9: 0x00d5, # GREEK CAPITAL LETTER OMEGA - 0x03aa: 0x0091, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03ab: 0x0096, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03ac: 0x009b, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03ad: 0x009d, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03ae: 0x009e, # GREEK SMALL LETTER ETA WITH TONOS - 0x03af: 0x009f, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03b0: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03b1: 0x00d6, # GREEK SMALL LETTER ALPHA - 0x03b2: 0x00d7, # GREEK SMALL LETTER BETA - 0x03b3: 0x00d8, # GREEK SMALL LETTER GAMMA - 0x03b4: 0x00dd, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00de, # GREEK SMALL LETTER EPSILON - 0x03b6: 0x00e0, # GREEK SMALL LETTER ZETA - 0x03b7: 0x00e1, # GREEK SMALL LETTER ETA - 0x03b8: 0x00e2, # GREEK SMALL LETTER THETA - 0x03b9: 0x00e3, # GREEK SMALL LETTER IOTA - 0x03ba: 0x00e4, # GREEK SMALL LETTER KAPPA - 0x03bb: 0x00e5, # GREEK SMALL LETTER LAMDA - 0x03bc: 0x00e6, # GREEK SMALL LETTER MU - 0x03bd: 0x00e7, # GREEK SMALL LETTER NU - 0x03be: 0x00e8, # GREEK SMALL LETTER XI - 0x03bf: 0x00e9, # GREEK SMALL LETTER OMICRON - 0x03c0: 0x00ea, # GREEK SMALL LETTER PI - 0x03c1: 0x00eb, # GREEK SMALL LETTER RHO - 0x03c2: 0x00ed, # GREEK SMALL LETTER FINAL SIGMA - 0x03c3: 0x00ec, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00ee, # GREEK SMALL LETTER TAU - 0x03c5: 0x00f2, # GREEK SMALL LETTER UPSILON - 0x03c6: 0x00f3, # GREEK SMALL LETTER PHI - 0x03c7: 0x00f4, # GREEK SMALL LETTER CHI - 0x03c8: 0x00f6, # GREEK SMALL LETTER PSI - 0x03c9: 0x00fa, # GREEK SMALL LETTER OMEGA - 0x03ca: 0x00a0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03cc: 0x00a2, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03cd: 0x00a3, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03ce: 0x00fd, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2015: 0x008e, # HORIZONTAL BAR - 0x2018: 0x008b, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x008c, # RIGHT SINGLE QUOTATION MARK - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -} diff --git a/python/Lib/encodings/cp874.py b/python/Lib/encodings/cp874.py deleted file mode 100755 index 6110f46e5b..0000000000 --- a/python/Lib/encodings/cp874.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp874 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp874', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\ufffe' # 0x82 -> UNDEFINED - u'\ufffe' # 0x83 -> UNDEFINED - u'\ufffe' # 0x84 -> UNDEFINED - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\ufffe' # 0x86 -> UNDEFINED - u'\ufffe' # 0x87 -> UNDEFINED - u'\ufffe' # 0x88 -> UNDEFINED - u'\ufffe' # 0x89 -> UNDEFINED - u'\ufffe' # 0x8A -> UNDEFINED - u'\ufffe' # 0x8B -> UNDEFINED - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\ufffe' # 0x99 -> UNDEFINED - u'\ufffe' # 0x9A -> UNDEFINED - u'\ufffe' # 0x9B -> UNDEFINED - u'\ufffe' # 0x9C -> UNDEFINED - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO - u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING - u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA - u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN - u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xC4 -> THAI CHARACTER RU - u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xC6 -> THAI CHARACTER LU - u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG - u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU - u'\ufffe' # 0xDB -> UNDEFINED - u'\ufffe' # 0xDC -> UNDEFINED - u'\ufffe' # 0xDD -> UNDEFINED - u'\ufffe' # 0xDE -> UNDEFINED - u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xF0 -> THAI DIGIT ZERO - u'\u0e51' # 0xF1 -> THAI DIGIT ONE - u'\u0e52' # 0xF2 -> THAI DIGIT TWO - u'\u0e53' # 0xF3 -> THAI DIGIT THREE - u'\u0e54' # 0xF4 -> THAI DIGIT FOUR - u'\u0e55' # 0xF5 -> THAI DIGIT FIVE - u'\u0e56' # 0xF6 -> THAI DIGIT SIX - u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xF9 -> THAI DIGIT NINE - u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT - u'\ufffe' # 0xFC -> UNDEFINED - u'\ufffe' # 0xFD -> UNDEFINED - u'\ufffe' # 0xFE -> UNDEFINED - u'\ufffe' # 0xFF -> UNDEFINED -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp875.py b/python/Lib/encodings/cp875.py deleted file mode 100755 index 72b160b02f..0000000000 --- a/python/Lib/encodings/cp875.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp875 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp875', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\u0391' # 0x41 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0x42 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0x43 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0x44 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0x45 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0x46 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0x47 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0x48 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0x49 -> GREEK CAPITAL LETTER IOTA - u'[' # 0x4A -> LEFT SQUARE BRACKET - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'!' # 0x4F -> EXCLAMATION MARK - u'&' # 0x50 -> AMPERSAND - u'\u039a' # 0x51 -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0x52 -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0x53 -> GREEK CAPITAL LETTER MU - u'\u039d' # 0x54 -> GREEK CAPITAL LETTER NU - u'\u039e' # 0x55 -> GREEK CAPITAL LETTER XI - u'\u039f' # 0x56 -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0x57 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0x58 -> GREEK CAPITAL LETTER RHO - u'\u03a3' # 0x59 -> GREEK CAPITAL LETTER SIGMA - u']' # 0x5A -> RIGHT SQUARE BRACKET - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'^' # 0x5F -> CIRCUMFLEX ACCENT - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\u03a4' # 0x62 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0x63 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0x64 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0x65 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0x66 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0x67 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'|' # 0x6A -> VERTICAL LINE - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xa8' # 0x70 -> DIAERESIS - u'\u0386' # 0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\u0388' # 0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0x73 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\xa0' # 0x74 -> NO-BREAK SPACE - u'\u038a' # 0x75 -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u038c' # 0x76 -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\u038e' # 0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\u0385' # 0x80 -> GREEK DIALYTIKA TONOS - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\u03b1' # 0x8A -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0x8B -> GREEK SMALL LETTER BETA - u'\u03b3' # 0x8C -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0x8D -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0x8E -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0x8F -> GREEK SMALL LETTER ZETA - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\u03b7' # 0x9A -> GREEK SMALL LETTER ETA - u'\u03b8' # 0x9B -> GREEK SMALL LETTER THETA - u'\u03b9' # 0x9C -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0x9D -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0x9E -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0x9F -> GREEK SMALL LETTER MU - u'\xb4' # 0xA0 -> ACUTE ACCENT - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\u03bd' # 0xAA -> GREEK SMALL LETTER NU - u'\u03be' # 0xAB -> GREEK SMALL LETTER XI - u'\u03bf' # 0xAC -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xAD -> GREEK SMALL LETTER PI - u'\u03c1' # 0xAE -> GREEK SMALL LETTER RHO - u'\u03c3' # 0xAF -> GREEK SMALL LETTER SIGMA - u'\xa3' # 0xB0 -> POUND SIGN - u'\u03ac' # 0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xB3 -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03ca' # 0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03af' # 0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03cb' # 0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03ce' # 0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u03c2' # 0xBA -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c4' # 0xBB -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xBC -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xBD -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xBE -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xBF -> GREEK SMALL LETTER PSI - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\u03c9' # 0xCB -> GREEK SMALL LETTER OMEGA - u'\u0390' # 0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03b0' # 0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u2018' # 0xCE -> LEFT SINGLE QUOTATION MARK - u'\u2015' # 0xCF -> HORIZONTAL BAR - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb1' # 0xDA -> PLUS-MINUS SIGN - u'\xbd' # 0xDB -> VULGAR FRACTION ONE HALF - u'\x1a' # 0xDC -> SUBSTITUTE - u'\u0387' # 0xDD -> GREEK ANO TELEIA - u'\u2019' # 0xDE -> RIGHT SINGLE QUOTATION MARK - u'\xa6' # 0xDF -> BROKEN BAR - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\x1a' # 0xE1 -> SUBSTITUTE - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xa7' # 0xEB -> SECTION SIGN - u'\x1a' # 0xEC -> SUBSTITUTE - u'\x1a' # 0xED -> SUBSTITUTE - u'\xab' # 0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xEF -> NOT SIGN - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xa9' # 0xFB -> COPYRIGHT SIGN - u'\x1a' # 0xFC -> SUBSTITUTE - u'\x1a' # 0xFD -> SUBSTITUTE - u'\xbb' # 0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\x9f' # 0xFF -> CONTROL -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/cp932.py b/python/Lib/encodings/cp932.py deleted file mode 100755 index e01f59b719..0000000000 --- a/python/Lib/encodings/cp932.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# cp932.py: Python Unicode Codec for CP932 -# -# Written by Hye-Shik Chang -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('cp932') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='cp932', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/cp949.py b/python/Lib/encodings/cp949.py deleted file mode 100755 index 627c87125e..0000000000 --- a/python/Lib/encodings/cp949.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# cp949.py: Python Unicode Codec for CP949 -# -# Written by Hye-Shik Chang -# - -import _codecs_kr, codecs -import _multibytecodec as mbc - -codec = _codecs_kr.getcodec('cp949') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='cp949', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/cp950.py b/python/Lib/encodings/cp950.py deleted file mode 100755 index 39eec5ed0d..0000000000 --- a/python/Lib/encodings/cp950.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# cp950.py: Python Unicode Codec for CP950 -# -# Written by Hye-Shik Chang -# - -import _codecs_tw, codecs -import _multibytecodec as mbc - -codec = _codecs_tw.getcodec('cp950') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='cp950', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/euc_jis_2004.py b/python/Lib/encodings/euc_jis_2004.py deleted file mode 100755 index 72b87aea68..0000000000 --- a/python/Lib/encodings/euc_jis_2004.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004 -# -# Written by Hye-Shik Chang -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('euc_jis_2004') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='euc_jis_2004', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/euc_jisx0213.py b/python/Lib/encodings/euc_jisx0213.py deleted file mode 100755 index cc47d04112..0000000000 --- a/python/Lib/encodings/euc_jisx0213.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213 -# -# Written by Hye-Shik Chang -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('euc_jisx0213') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='euc_jisx0213', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/euc_jp.py b/python/Lib/encodings/euc_jp.py deleted file mode 100755 index 7bcbe4147f..0000000000 --- a/python/Lib/encodings/euc_jp.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# euc_jp.py: Python Unicode Codec for EUC_JP -# -# Written by Hye-Shik Chang -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('euc_jp') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='euc_jp', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/euc_kr.py b/python/Lib/encodings/euc_kr.py deleted file mode 100755 index c1fb1260e8..0000000000 --- a/python/Lib/encodings/euc_kr.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# euc_kr.py: Python Unicode Codec for EUC_KR -# -# Written by Hye-Shik Chang -# - -import _codecs_kr, codecs -import _multibytecodec as mbc - -codec = _codecs_kr.getcodec('euc_kr') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='euc_kr', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/gb18030.py b/python/Lib/encodings/gb18030.py deleted file mode 100755 index 34fb6c366a..0000000000 --- a/python/Lib/encodings/gb18030.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# gb18030.py: Python Unicode Codec for GB18030 -# -# Written by Hye-Shik Chang -# - -import _codecs_cn, codecs -import _multibytecodec as mbc - -codec = _codecs_cn.getcodec('gb18030') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='gb18030', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/gb2312.py b/python/Lib/encodings/gb2312.py deleted file mode 100755 index 3c3b837d61..0000000000 --- a/python/Lib/encodings/gb2312.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# gb2312.py: Python Unicode Codec for GB2312 -# -# Written by Hye-Shik Chang -# - -import _codecs_cn, codecs -import _multibytecodec as mbc - -codec = _codecs_cn.getcodec('gb2312') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='gb2312', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/gbk.py b/python/Lib/encodings/gbk.py deleted file mode 100755 index 1b45db8985..0000000000 --- a/python/Lib/encodings/gbk.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# gbk.py: Python Unicode Codec for GBK -# -# Written by Hye-Shik Chang -# - -import _codecs_cn, codecs -import _multibytecodec as mbc - -codec = _codecs_cn.getcodec('gbk') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='gbk', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/hex_codec.py b/python/Lib/encodings/hex_codec.py deleted file mode 100755 index 154488cd0a..0000000000 --- a/python/Lib/encodings/hex_codec.py +++ /dev/null @@ -1,80 +0,0 @@ -""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding - - Unlike most of the other codecs which target Unicode, this codec - will return Python string objects for both encode and decode. - - Written by Marc-Andre Lemburg (mal@lemburg.com). - -""" -import codecs, binascii - -### Codec APIs - -def hex_encode(input,errors='strict'): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = binascii.b2a_hex(input) - return (output, len(input)) - -def hex_decode(input,errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = binascii.a2b_hex(input) - return (output, len(input)) - -class Codec(codecs.Codec): - - def encode(self, input,errors='strict'): - return hex_encode(input,errors) - def decode(self, input,errors='strict'): - return hex_decode(input,errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - assert self.errors == 'strict' - return binascii.b2a_hex(input) - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - assert self.errors == 'strict' - return binascii.a2b_hex(input) - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='hex', - encode=hex_encode, - decode=hex_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - _is_text_encoding=False, - ) diff --git a/python/Lib/encodings/hp_roman8.py b/python/Lib/encodings/hp_roman8.py deleted file mode 100755 index dbaaa72d76..0000000000 --- a/python/Lib/encodings/hp_roman8.py +++ /dev/null @@ -1,152 +0,0 @@ -""" Python Character Mapping Codec generated from 'hp_roman8.txt' with gencodec.py. - - Based on data from ftp://dkuug.dk/i18n/charmaps/HP-ROMAN8 (Keld Simonsen) - - Original source: LaserJet IIP Printer User's Manual HP part no - 33471-90901, Hewlet-Packard, June 1989. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_map)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='hp-roman8', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x00a1: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00a2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00a3: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00a4: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00a5: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00a6: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00a7: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00a8: 0x00b4, # ACUTE ACCENT - 0x00a9: 0x02cb, # MODIFIER LETTER GRAVE ACCENT (Mandarin Chinese fourth tone) - 0x00aa: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00ab: 0x00a8, # DIAERESIS - 0x00ac: 0x02dc, # SMALL TILDE - 0x00ad: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ae: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00af: 0x20a4, # LIRA SIGN - 0x00b0: 0x00af, # MACRON - 0x00b1: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00b2: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00b3: 0x00b0, # DEGREE SIGN - 0x00b4: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00b5: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00b6: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00b7: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00b8: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00b9: 0x00bf, # INVERTED QUESTION MARK - 0x00ba: 0x00a4, # CURRENCY SIGN - 0x00bb: 0x00a3, # POUND SIGN - 0x00bc: 0x00a5, # YEN SIGN - 0x00bd: 0x00a7, # SECTION SIGN - 0x00be: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00bf: 0x00a2, # CENT SIGN - 0x00c0: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00c1: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00c2: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00c3: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00c4: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00c5: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x00c6: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00c7: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00c8: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x00c9: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x00ca: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x00cb: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x00cc: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00cd: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ce: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00cf: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00d0: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00d1: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00d2: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d3: 0x00c6, # LATIN CAPITAL LETTER AE - 0x00d4: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00d5: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00d6: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00d7: 0x00e6, # LATIN SMALL LETTER AE - 0x00d8: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00d9: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x00da: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00db: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dc: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00dd: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00de: 0x00df, # LATIN SMALL LETTER SHARP S (German) - 0x00df: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e0: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e1: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00e2: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00e3: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic) - 0x00e4: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic) - 0x00e5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00e6: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00e7: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e8: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e9: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ea: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00eb: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00ec: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00ed: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ee: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00ef: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00f0: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic) - 0x00f1: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic) - 0x00f2: 0x00b7, # MIDDLE DOT - 0x00f3: 0x00b5, # MICRO SIGN - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f6: 0x2014, # EM DASH - 0x00f7: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00f8: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00f9: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00fa: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00fb: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00fc: 0x25a0, # BLACK SQUARE - 0x00fd: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00fe: 0x00b1, # PLUS-MINUS SIGN - 0x00ff: None, -}) - -### Encoding Map - -encoding_map = codecs.make_encoding_map(decoding_map) diff --git a/python/Lib/encodings/hz.py b/python/Lib/encodings/hz.py deleted file mode 100755 index 383442a3c9..0000000000 --- a/python/Lib/encodings/hz.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# hz.py: Python Unicode Codec for HZ -# -# Written by Hye-Shik Chang -# - -import _codecs_cn, codecs -import _multibytecodec as mbc - -codec = _codecs_cn.getcodec('hz') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='hz', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/idna.py b/python/Lib/encodings/idna.py deleted file mode 100755 index ea90d67142..0000000000 --- a/python/Lib/encodings/idna.py +++ /dev/null @@ -1,288 +0,0 @@ -# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) - -import stringprep, re, codecs -from unicodedata import ucd_3_2_0 as unicodedata - -# IDNA section 3.1 -dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]") - -# IDNA section 5 -ace_prefix = "xn--" -uace_prefix = unicode(ace_prefix, "ascii") - -# This assumes query strings, so AllowUnassigned is true -def nameprep(label): - # Map - newlabel = [] - for c in label: - if stringprep.in_table_b1(c): - # Map to nothing - continue - newlabel.append(stringprep.map_table_b2(c)) - label = u"".join(newlabel) - - # Normalize - label = unicodedata.normalize("NFKC", label) - - # Prohibit - for c in label: - if stringprep.in_table_c12(c) or \ - stringprep.in_table_c22(c) or \ - stringprep.in_table_c3(c) or \ - stringprep.in_table_c4(c) or \ - stringprep.in_table_c5(c) or \ - stringprep.in_table_c6(c) or \ - stringprep.in_table_c7(c) or \ - stringprep.in_table_c8(c) or \ - stringprep.in_table_c9(c): - raise UnicodeError("Invalid character %r" % c) - - # Check bidi - RandAL = map(stringprep.in_table_d1, label) - for c in RandAL: - if c: - # There is a RandAL char in the string. Must perform further - # tests: - # 1) The characters in section 5.8 MUST be prohibited. - # This is table C.8, which was already checked - # 2) If a string contains any RandALCat character, the string - # MUST NOT contain any LCat character. - if filter(stringprep.in_table_d2, label): - raise UnicodeError("Violation of BIDI requirement 2") - - # 3) If a string contains any RandALCat character, a - # RandALCat character MUST be the first character of the - # string, and a RandALCat character MUST be the last - # character of the string. - if not RandAL[0] or not RandAL[-1]: - raise UnicodeError("Violation of BIDI requirement 3") - - return label - -def ToASCII(label): - try: - # Step 1: try ASCII - label = label.encode("ascii") - except UnicodeError: - pass - else: - # Skip to step 3: UseSTD3ASCIIRules is false, so - # Skip to step 8. - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") - - # Step 2: nameprep - label = nameprep(label) - - # Step 3: UseSTD3ASCIIRules is false - # Step 4: try ASCII - try: - label = label.encode("ascii") - except UnicodeError: - pass - else: - # Skip to step 8. - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") - - # Step 5: Check ACE prefix - if label.startswith(uace_prefix): - raise UnicodeError("Label starts with ACE prefix") - - # Step 6: Encode with PUNYCODE - label = label.encode("punycode") - - # Step 7: Prepend ACE prefix - label = ace_prefix + label - - # Step 8: Check size - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") - -def ToUnicode(label): - # Step 1: Check for ASCII - if isinstance(label, str): - pure_ascii = True - else: - try: - label = label.encode("ascii") - pure_ascii = True - except UnicodeError: - pure_ascii = False - if not pure_ascii: - # Step 2: Perform nameprep - label = nameprep(label) - # It doesn't say this, but apparently, it should be ASCII now - try: - label = label.encode("ascii") - except UnicodeError: - raise UnicodeError("Invalid character in IDN label") - # Step 3: Check for ACE prefix - if not label.startswith(ace_prefix): - return unicode(label, "ascii") - - # Step 4: Remove ACE prefix - label1 = label[len(ace_prefix):] - - # Step 5: Decode using PUNYCODE - result = label1.decode("punycode") - - # Step 6: Apply ToASCII - label2 = ToASCII(result) - - # Step 7: Compare the result of step 6 with the one of step 3 - # label2 will already be in lower case. - if label.lower() != label2: - raise UnicodeError("IDNA does not round-trip", label, label2) - - # Step 8: return the result of step 5 - return result - -### Codec APIs - -class Codec(codecs.Codec): - def encode(self,input,errors='strict'): - - if errors != 'strict': - # IDNA is quite clear that implementations must be strict - raise UnicodeError("unsupported error handling "+errors) - - if not input: - return "", 0 - - result = [] - labels = dots.split(input) - if labels and len(labels[-1])==0: - trailing_dot = '.' - del labels[-1] - else: - trailing_dot = '' - for label in labels: - result.append(ToASCII(label)) - # Join with U+002E - return ".".join(result)+trailing_dot, len(input) - - def decode(self,input,errors='strict'): - - if errors != 'strict': - raise UnicodeError("Unsupported error handling "+errors) - - if not input: - return u"", 0 - - # IDNA allows decoding to operate on Unicode strings, too. - if isinstance(input, unicode): - labels = dots.split(input) - else: - # Must be ASCII string - input = str(input) - unicode(input, "ascii") - labels = input.split(".") - - if labels and len(labels[-1]) == 0: - trailing_dot = u'.' - del labels[-1] - else: - trailing_dot = u'' - - result = [] - for label in labels: - result.append(ToUnicode(label)) - - return u".".join(result)+trailing_dot, len(input) - -class IncrementalEncoder(codecs.BufferedIncrementalEncoder): - def _buffer_encode(self, input, errors, final): - if errors != 'strict': - # IDNA is quite clear that implementations must be strict - raise UnicodeError("unsupported error handling "+errors) - - if not input: - return ("", 0) - - labels = dots.split(input) - trailing_dot = u'' - if labels: - if not labels[-1]: - trailing_dot = '.' - del labels[-1] - elif not final: - # Keep potentially unfinished label until the next call - del labels[-1] - if labels: - trailing_dot = '.' - - result = [] - size = 0 - for label in labels: - result.append(ToASCII(label)) - if size: - size += 1 - size += len(label) - - # Join with U+002E - result = ".".join(result) + trailing_dot - size += len(trailing_dot) - return (result, size) - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def _buffer_decode(self, input, errors, final): - if errors != 'strict': - raise UnicodeError("Unsupported error handling "+errors) - - if not input: - return (u"", 0) - - # IDNA allows decoding to operate on Unicode strings, too. - if isinstance(input, unicode): - labels = dots.split(input) - else: - # Must be ASCII string - input = str(input) - unicode(input, "ascii") - labels = input.split(".") - - trailing_dot = u'' - if labels: - if not labels[-1]: - trailing_dot = u'.' - del labels[-1] - elif not final: - # Keep potentially unfinished label until the next call - del labels[-1] - if labels: - trailing_dot = u'.' - - result = [] - size = 0 - for label in labels: - result.append(ToUnicode(label)) - if size: - size += 1 - size += len(label) - - result = u".".join(result) + trailing_dot - size += len(trailing_dot) - return (result, size) - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='idna', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/python/Lib/encodings/iso2022_jp.py b/python/Lib/encodings/iso2022_jp.py deleted file mode 100755 index ab04060693..0000000000 --- a/python/Lib/encodings/iso2022_jp.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# iso2022_jp.py: Python Unicode Codec for ISO2022_JP -# -# Written by Hye-Shik Chang -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/iso2022_jp_1.py b/python/Lib/encodings/iso2022_jp_1.py deleted file mode 100755 index 997044dc37..0000000000 --- a/python/Lib/encodings/iso2022_jp_1.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1 -# -# Written by Hye-Shik Chang -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp_1') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp_1', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/iso2022_jp_2.py b/python/Lib/encodings/iso2022_jp_2.py deleted file mode 100755 index 9106bf7625..0000000000 --- a/python/Lib/encodings/iso2022_jp_2.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2 -# -# Written by Hye-Shik Chang -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp_2') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp_2', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/iso2022_jp_2004.py b/python/Lib/encodings/iso2022_jp_2004.py deleted file mode 100755 index 40198bf098..0000000000 --- a/python/Lib/encodings/iso2022_jp_2004.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004 -# -# Written by Hye-Shik Chang -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp_2004') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp_2004', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/iso2022_jp_3.py b/python/Lib/encodings/iso2022_jp_3.py deleted file mode 100755 index 346e08becc..0000000000 --- a/python/Lib/encodings/iso2022_jp_3.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3 -# -# Written by Hye-Shik Chang -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp_3') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp_3', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/iso2022_jp_ext.py b/python/Lib/encodings/iso2022_jp_ext.py deleted file mode 100755 index 752bab9813..0000000000 --- a/python/Lib/encodings/iso2022_jp_ext.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT -# -# Written by Hye-Shik Chang -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp_ext') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp_ext', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/iso2022_kr.py b/python/Lib/encodings/iso2022_kr.py deleted file mode 100755 index bf7018763e..0000000000 --- a/python/Lib/encodings/iso2022_kr.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# iso2022_kr.py: Python Unicode Codec for ISO2022_KR -# -# Written by Hye-Shik Chang -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_kr') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_kr', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/iso8859_1.py b/python/Lib/encodings/iso8859_1.py deleted file mode 100755 index 71bc13fcbb..0000000000 --- a/python/Lib/encodings/iso8859_1.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_1 generated from 'MAPPINGS/ISO8859/8859-1.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-1', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_10.py b/python/Lib/encodings/iso8859_10.py deleted file mode 100755 index 757e5c5eb9..0000000000 --- a/python/Lib/encodings/iso8859_10.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_10 generated from 'MAPPINGS/ISO8859/8859-10.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-10', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0112' # 0xA2 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0122' # 0xA3 -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u012a' # 0xA4 -> LATIN CAPITAL LETTER I WITH MACRON - u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE - u'\u0136' # 0xA6 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u013b' # 0xA8 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0110' # 0xA9 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0160' # 0xAA -> LATIN CAPITAL LETTER S WITH CARON - u'\u0166' # 0xAB -> LATIN CAPITAL LETTER T WITH STROKE - u'\u017d' # 0xAC -> LATIN CAPITAL LETTER Z WITH CARON - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u016a' # 0xAE -> LATIN CAPITAL LETTER U WITH MACRON - u'\u014a' # 0xAF -> LATIN CAPITAL LETTER ENG - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u0113' # 0xB2 -> LATIN SMALL LETTER E WITH MACRON - u'\u0123' # 0xB3 -> LATIN SMALL LETTER G WITH CEDILLA - u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON - u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE - u'\u0137' # 0xB6 -> LATIN SMALL LETTER K WITH CEDILLA - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u013c' # 0xB8 -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0111' # 0xB9 -> LATIN SMALL LETTER D WITH STROKE - u'\u0161' # 0xBA -> LATIN SMALL LETTER S WITH CARON - u'\u0167' # 0xBB -> LATIN SMALL LETTER T WITH STROKE - u'\u017e' # 0xBC -> LATIN SMALL LETTER Z WITH CARON - u'\u2015' # 0xBD -> HORIZONTAL BAR - u'\u016b' # 0xBE -> LATIN SMALL LETTER U WITH MACRON - u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG - u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) - u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u0168' # 0xD7 -> LATIN CAPITAL LETTER U WITH TILDE - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) - u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) - u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u0169' # 0xF7 -> LATIN SMALL LETTER U WITH TILDE - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) - u'\u0138' # 0xFF -> LATIN SMALL LETTER KRA -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_11.py b/python/Lib/encodings/iso8859_11.py deleted file mode 100755 index 27ece8dc7b..0000000000 --- a/python/Lib/encodings/iso8859_11.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_11 generated from 'MAPPINGS/ISO8859/8859-11.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-11', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO - u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING - u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA - u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN - u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xC4 -> THAI CHARACTER RU - u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xC6 -> THAI CHARACTER LU - u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG - u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xF0 -> THAI DIGIT ZERO - u'\u0e51' # 0xF1 -> THAI DIGIT ONE - u'\u0e52' # 0xF2 -> THAI DIGIT TWO - u'\u0e53' # 0xF3 -> THAI DIGIT THREE - u'\u0e54' # 0xF4 -> THAI DIGIT FOUR - u'\u0e55' # 0xF5 -> THAI DIGIT FIVE - u'\u0e56' # 0xF6 -> THAI DIGIT SIX - u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xF9 -> THAI DIGIT NINE - u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_13.py b/python/Lib/encodings/iso8859_13.py deleted file mode 100755 index 71adb5c19a..0000000000 --- a/python/Lib/encodings/iso8859_13.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_13 generated from 'MAPPINGS/ISO8859/8859-13.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-13', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u201d' # 0xA1 -> RIGHT DOUBLE QUOTATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u201c' # 0xB4 -> LEFT DOUBLE QUOTATION MARK - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xe6' # 0xBF -> LATIN SMALL LETTER AE - u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON - u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) - u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON - u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE - u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA - u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON - u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK - u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE - u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE - u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON - u'\u2019' # 0xFF -> RIGHT SINGLE QUOTATION MARK -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_14.py b/python/Lib/encodings/iso8859_14.py deleted file mode 100755 index 56843d5fd0..0000000000 --- a/python/Lib/encodings/iso8859_14.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_14 generated from 'MAPPINGS/ISO8859/8859-14.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-14', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u1e02' # 0xA1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE - u'\u1e03' # 0xA2 -> LATIN SMALL LETTER B WITH DOT ABOVE - u'\xa3' # 0xA3 -> POUND SIGN - u'\u010a' # 0xA4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE - u'\u010b' # 0xA5 -> LATIN SMALL LETTER C WITH DOT ABOVE - u'\u1e0a' # 0xA6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u1e80' # 0xA8 -> LATIN CAPITAL LETTER W WITH GRAVE - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u1e82' # 0xAA -> LATIN CAPITAL LETTER W WITH ACUTE - u'\u1e0b' # 0xAB -> LATIN SMALL LETTER D WITH DOT ABOVE - u'\u1ef2' # 0xAC -> LATIN CAPITAL LETTER Y WITH GRAVE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u0178' # 0xAF -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u1e1e' # 0xB0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE - u'\u1e1f' # 0xB1 -> LATIN SMALL LETTER F WITH DOT ABOVE - u'\u0120' # 0xB2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE - u'\u0121' # 0xB3 -> LATIN SMALL LETTER G WITH DOT ABOVE - u'\u1e40' # 0xB4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE - u'\u1e41' # 0xB5 -> LATIN SMALL LETTER M WITH DOT ABOVE - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\u1e56' # 0xB7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE - u'\u1e81' # 0xB8 -> LATIN SMALL LETTER W WITH GRAVE - u'\u1e57' # 0xB9 -> LATIN SMALL LETTER P WITH DOT ABOVE - u'\u1e83' # 0xBA -> LATIN SMALL LETTER W WITH ACUTE - u'\u1e60' # 0xBB -> LATIN CAPITAL LETTER S WITH DOT ABOVE - u'\u1ef3' # 0xBC -> LATIN SMALL LETTER Y WITH GRAVE - u'\u1e84' # 0xBD -> LATIN CAPITAL LETTER W WITH DIAERESIS - u'\u1e85' # 0xBE -> LATIN SMALL LETTER W WITH DIAERESIS - u'\u1e61' # 0xBF -> LATIN SMALL LETTER S WITH DOT ABOVE - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0174' # 0xD0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u1e6a' # 0xD7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0176' # 0xDE -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0175' # 0xF0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u1e6b' # 0xF7 -> LATIN SMALL LETTER T WITH DOT ABOVE - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0177' # 0xFE -> LATIN SMALL LETTER Y WITH CIRCUMFLEX - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_15.py b/python/Lib/encodings/iso8859_15.py deleted file mode 100755 index 13b140ca3b..0000000000 --- a/python/Lib/encodings/iso8859_15.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_15 generated from 'MAPPINGS/ISO8859/8859-15.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-15', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\u20ac' # 0xA4 -> EURO SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE - u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_16.py b/python/Lib/encodings/iso8859_16.py deleted file mode 100755 index 00b9ac8055..0000000000 --- a/python/Lib/encodings/iso8859_16.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_16 generated from 'MAPPINGS/ISO8859/8859-16.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-16', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0105' # 0xA2 -> LATIN SMALL LETTER A WITH OGONEK - u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u20ac' # 0xA4 -> EURO SIGN - u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK - u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0218' # 0xAA -> LATIN CAPITAL LETTER S WITH COMMA BELOW - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u017a' # 0xAE -> LATIN SMALL LETTER Z WITH ACUTE - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u010c' # 0xB2 -> LATIN CAPITAL LETTER C WITH CARON - u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE - u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON - u'\u201d' # 0xB5 -> RIGHT DOUBLE QUOTATION MARK - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON - u'\u010d' # 0xB9 -> LATIN SMALL LETTER C WITH CARON - u'\u0219' # 0xBA -> LATIN SMALL LETTER S WITH COMMA BELOW - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE - u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0106' # 0xC5 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u015a' # 0xD7 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u0170' # 0xD8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0118' # 0xDD -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0107' # 0xE5 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u015b' # 0xF7 -> LATIN SMALL LETTER S WITH ACUTE - u'\u0171' # 0xF8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0119' # 0xFD -> LATIN SMALL LETTER E WITH OGONEK - u'\u021b' # 0xFE -> LATIN SMALL LETTER T WITH COMMA BELOW - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_2.py b/python/Lib/encodings/iso8859_2.py deleted file mode 100755 index 38e91d8e17..0000000000 --- a/python/Lib/encodings/iso8859_2.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_2 generated from 'MAPPINGS/ISO8859/8859-2.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-2', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u02d8' # 0xA2 -> BREVE - u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u013d' # 0xA5 -> LATIN CAPITAL LETTER L WITH CARON - u'\u015a' # 0xA6 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u0164' # 0xAB -> LATIN CAPITAL LETTER T WITH CARON - u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u02db' # 0xB2 -> OGONEK - u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\u013e' # 0xB5 -> LATIN SMALL LETTER L WITH CARON - u'\u015b' # 0xB6 -> LATIN SMALL LETTER S WITH ACUTE - u'\u02c7' # 0xB7 -> CARON - u'\xb8' # 0xB8 -> CEDILLA - u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON - u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA - u'\u0165' # 0xBB -> LATIN SMALL LETTER T WITH CARON - u'\u017a' # 0xBC -> LATIN SMALL LETTER Z WITH ACUTE - u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT - u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON - u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE - u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON - u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA - u'\u02d9' # 0xFF -> DOT ABOVE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_3.py b/python/Lib/encodings/iso8859_3.py deleted file mode 100755 index 23daafdbb1..0000000000 --- a/python/Lib/encodings/iso8859_3.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_3 generated from 'MAPPINGS/ISO8859/8859-3.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-3', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0126' # 0xA1 -> LATIN CAPITAL LETTER H WITH STROKE - u'\u02d8' # 0xA2 -> BREVE - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\ufffe' - u'\u0124' # 0xA6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\u0130' # 0xA9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u011e' # 0xAB -> LATIN CAPITAL LETTER G WITH BREVE - u'\u0134' # 0xAC -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX - u'\xad' # 0xAD -> SOFT HYPHEN - u'\ufffe' - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0127' # 0xB1 -> LATIN SMALL LETTER H WITH STROKE - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u0125' # 0xB6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\u0131' # 0xB9 -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA - u'\u011f' # 0xBB -> LATIN SMALL LETTER G WITH BREVE - u'\u0135' # 0xBC -> LATIN SMALL LETTER J WITH CIRCUMFLEX - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\ufffe' - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\ufffe' - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u010a' # 0xC5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE - u'\u0108' # 0xC6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\ufffe' - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0120' # 0xD5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u011c' # 0xD8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u016c' # 0xDD -> LATIN CAPITAL LETTER U WITH BREVE - u'\u015c' # 0xDE -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\ufffe' - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u010b' # 0xE5 -> LATIN SMALL LETTER C WITH DOT ABOVE - u'\u0109' # 0xE6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\ufffe' - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0121' # 0xF5 -> LATIN SMALL LETTER G WITH DOT ABOVE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u011d' # 0xF8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u016d' # 0xFD -> LATIN SMALL LETTER U WITH BREVE - u'\u015d' # 0xFE -> LATIN SMALL LETTER S WITH CIRCUMFLEX - u'\u02d9' # 0xFF -> DOT ABOVE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_4.py b/python/Lib/encodings/iso8859_4.py deleted file mode 100755 index c8e03b566a..0000000000 --- a/python/Lib/encodings/iso8859_4.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_4 generated from 'MAPPINGS/ISO8859/8859-4.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-4', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0138' # 0xA2 -> LATIN SMALL LETTER KRA - u'\u0156' # 0xA3 -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE - u'\u013b' # 0xA6 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0112' # 0xAA -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0122' # 0xAB -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0166' # 0xAC -> LATIN CAPITAL LETTER T WITH STROKE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u02db' # 0xB2 -> OGONEK - u'\u0157' # 0xB3 -> LATIN SMALL LETTER R WITH CEDILLA - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE - u'\u013c' # 0xB6 -> LATIN SMALL LETTER L WITH CEDILLA - u'\u02c7' # 0xB7 -> CARON - u'\xb8' # 0xB8 -> CEDILLA - u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON - u'\u0113' # 0xBA -> LATIN SMALL LETTER E WITH MACRON - u'\u0123' # 0xBB -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0167' # 0xBC -> LATIN SMALL LETTER T WITH STROKE - u'\u014a' # 0xBD -> LATIN CAPITAL LETTER ENG - u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON - u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG - u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u012a' # 0xCF -> LATIN CAPITAL LETTER I WITH MACRON - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\u0136' # 0xD3 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0168' # 0xDD -> LATIN CAPITAL LETTER U WITH TILDE - u'\u016a' # 0xDE -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u012b' # 0xEF -> LATIN SMALL LETTER I WITH MACRON - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON - u'\u0137' # 0xF3 -> LATIN SMALL LETTER K WITH CEDILLA - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0169' # 0xFD -> LATIN SMALL LETTER U WITH TILDE - u'\u016b' # 0xFE -> LATIN SMALL LETTER U WITH MACRON - u'\u02d9' # 0xFF -> DOT ABOVE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_5.py b/python/Lib/encodings/iso8859_5.py deleted file mode 100755 index c01cd1caab..0000000000 --- a/python/Lib/encodings/iso8859_5.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_5 generated from 'MAPPINGS/ISO8859/8859-5.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-5', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0401' # 0xA1 -> CYRILLIC CAPITAL LETTER IO - u'\u0402' # 0xA2 -> CYRILLIC CAPITAL LETTER DJE - u'\u0403' # 0xA3 -> CYRILLIC CAPITAL LETTER GJE - u'\u0404' # 0xA4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0405' # 0xA5 -> CYRILLIC CAPITAL LETTER DZE - u'\u0406' # 0xA6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0407' # 0xA7 -> CYRILLIC CAPITAL LETTER YI - u'\u0408' # 0xA8 -> CYRILLIC CAPITAL LETTER JE - u'\u0409' # 0xA9 -> CYRILLIC CAPITAL LETTER LJE - u'\u040a' # 0xAA -> CYRILLIC CAPITAL LETTER NJE - u'\u040b' # 0xAB -> CYRILLIC CAPITAL LETTER TSHE - u'\u040c' # 0xAC -> CYRILLIC CAPITAL LETTER KJE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u040e' # 0xAE -> CYRILLIC CAPITAL LETTER SHORT U - u'\u040f' # 0xAF -> CYRILLIC CAPITAL LETTER DZHE - u'\u0410' # 0xB0 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xB1 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0xB2 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0xB3 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0xB4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xB5 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0xB6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0xB7 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0xB8 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xB9 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xBA -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xBB -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xBC -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xBD -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xBE -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xBF -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0xC0 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xC1 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xC2 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xC3 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0xC4 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0xC5 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0xC6 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0xC7 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0xC8 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0xC9 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0xCA -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0xCB -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0xCC -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0xCD -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0xCE -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0xCF -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0xD0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xD1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xD2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xD3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xD4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xD5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xD7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xD8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xD9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xDA -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xDB -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xDC -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xDD -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xDE -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xDF -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xE0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xE1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xE2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xE3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xE4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xE5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xE6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xE7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xE8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xE9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xEA -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xEB -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xEC -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xED -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xEE -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0xEF -> CYRILLIC SMALL LETTER YA - u'\u2116' # 0xF0 -> NUMERO SIGN - u'\u0451' # 0xF1 -> CYRILLIC SMALL LETTER IO - u'\u0452' # 0xF2 -> CYRILLIC SMALL LETTER DJE - u'\u0453' # 0xF3 -> CYRILLIC SMALL LETTER GJE - u'\u0454' # 0xF4 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0455' # 0xF5 -> CYRILLIC SMALL LETTER DZE - u'\u0456' # 0xF6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0xF7 -> CYRILLIC SMALL LETTER YI - u'\u0458' # 0xF8 -> CYRILLIC SMALL LETTER JE - u'\u0459' # 0xF9 -> CYRILLIC SMALL LETTER LJE - u'\u045a' # 0xFA -> CYRILLIC SMALL LETTER NJE - u'\u045b' # 0xFB -> CYRILLIC SMALL LETTER TSHE - u'\u045c' # 0xFC -> CYRILLIC SMALL LETTER KJE - u'\xa7' # 0xFD -> SECTION SIGN - u'\u045e' # 0xFE -> CYRILLIC SMALL LETTER SHORT U - u'\u045f' # 0xFF -> CYRILLIC SMALL LETTER DZHE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_6.py b/python/Lib/encodings/iso8859_6.py deleted file mode 100755 index 16c34a3f61..0000000000 --- a/python/Lib/encodings/iso8859_6.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_6 generated from 'MAPPINGS/ISO8859/8859-6.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-6', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u060c' # 0xAC -> ARABIC COMMA - u'\xad' # 0xAD -> SOFT HYPHEN - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u061b' # 0xBB -> ARABIC SEMICOLON - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u061f' # 0xBF -> ARABIC QUESTION MARK - u'\ufffe' - u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xC7 -> ARABIC LETTER ALEF - u'\u0628' # 0xC8 -> ARABIC LETTER BEH - u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xCA -> ARABIC LETTER TEH - u'\u062b' # 0xCB -> ARABIC LETTER THEH - u'\u062c' # 0xCC -> ARABIC LETTER JEEM - u'\u062d' # 0xCD -> ARABIC LETTER HAH - u'\u062e' # 0xCE -> ARABIC LETTER KHAH - u'\u062f' # 0xCF -> ARABIC LETTER DAL - u'\u0630' # 0xD0 -> ARABIC LETTER THAL - u'\u0631' # 0xD1 -> ARABIC LETTER REH - u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xD3 -> ARABIC LETTER SEEN - u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xD5 -> ARABIC LETTER SAD - u'\u0636' # 0xD6 -> ARABIC LETTER DAD - u'\u0637' # 0xD7 -> ARABIC LETTER TAH - u'\u0638' # 0xD8 -> ARABIC LETTER ZAH - u'\u0639' # 0xD9 -> ARABIC LETTER AIN - u'\u063a' # 0xDA -> ARABIC LETTER GHAIN - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u0640' # 0xE0 -> ARABIC TATWEEL - u'\u0641' # 0xE1 -> ARABIC LETTER FEH - u'\u0642' # 0xE2 -> ARABIC LETTER QAF - u'\u0643' # 0xE3 -> ARABIC LETTER KAF - u'\u0644' # 0xE4 -> ARABIC LETTER LAM - u'\u0645' # 0xE5 -> ARABIC LETTER MEEM - u'\u0646' # 0xE6 -> ARABIC LETTER NOON - u'\u0647' # 0xE7 -> ARABIC LETTER HEH - u'\u0648' # 0xE8 -> ARABIC LETTER WAW - u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xEA -> ARABIC LETTER YEH - u'\u064b' # 0xEB -> ARABIC FATHATAN - u'\u064c' # 0xEC -> ARABIC DAMMATAN - u'\u064d' # 0xED -> ARABIC KASRATAN - u'\u064e' # 0xEE -> ARABIC FATHA - u'\u064f' # 0xEF -> ARABIC DAMMA - u'\u0650' # 0xF0 -> ARABIC KASRA - u'\u0651' # 0xF1 -> ARABIC SHADDA - u'\u0652' # 0xF2 -> ARABIC SUKUN - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_7.py b/python/Lib/encodings/iso8859_7.py deleted file mode 100755 index a560023a08..0000000000 --- a/python/Lib/encodings/iso8859_7.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_7 generated from 'MAPPINGS/ISO8859/8859-7.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-7', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u2018' # 0xA1 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xA2 -> RIGHT SINGLE QUOTATION MARK - u'\xa3' # 0xA3 -> POUND SIGN - u'\u20ac' # 0xA4 -> EURO SIGN - u'\u20af' # 0xA5 -> DRACHMA SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u037a' # 0xAA -> GREEK YPOGEGRAMMENI - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\ufffe' - u'\u2015' # 0xAF -> HORIZONTAL BAR - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u0384' # 0xB4 -> GREEK TONOS - u'\u0385' # 0xB5 -> GREEK DIALYTIKA TONOS - u'\u0386' # 0xB6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU - u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU - u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI - u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO - u'\ufffe' - u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU - u'\u03bd' # 0xED -> GREEK SMALL LETTER NU - u'\u03be' # 0xEE -> GREEK SMALL LETTER XI - u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI - u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO - u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI - u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA - u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\ufffe' -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_8.py b/python/Lib/encodings/iso8859_8.py deleted file mode 100755 index 43cf2138b5..0000000000 --- a/python/Lib/encodings/iso8859_8.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_8 generated from 'MAPPINGS/ISO8859/8859-8.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-8', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\ufffe' - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xd7' # 0xAA -> MULTIPLICATION SIGN - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xf7' # 0xBA -> DIVISION SIGN - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u2017' # 0xDF -> DOUBLE LOW LINE - u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF - u'\u05d1' # 0xE1 -> HEBREW LETTER BET - u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL - u'\u05d3' # 0xE3 -> HEBREW LETTER DALET - u'\u05d4' # 0xE4 -> HEBREW LETTER HE - u'\u05d5' # 0xE5 -> HEBREW LETTER VAV - u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0xE7 -> HEBREW LETTER HET - u'\u05d8' # 0xE8 -> HEBREW LETTER TET - u'\u05d9' # 0xE9 -> HEBREW LETTER YOD - u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF - u'\u05db' # 0xEB -> HEBREW LETTER KAF - u'\u05dc' # 0xEC -> HEBREW LETTER LAMED - u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM - u'\u05de' # 0xEE -> HEBREW LETTER MEM - u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0xF0 -> HEBREW LETTER NUN - u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN - u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0xF4 -> HEBREW LETTER PE - u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI - u'\u05e7' # 0xF7 -> HEBREW LETTER QOF - u'\u05e8' # 0xF8 -> HEBREW LETTER RESH - u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN - u'\u05ea' # 0xFA -> HEBREW LETTER TAV - u'\ufffe' - u'\ufffe' - u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK - u'\ufffe' -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/iso8859_9.py b/python/Lib/encodings/iso8859_9.py deleted file mode 100755 index b8029382c0..0000000000 --- a/python/Lib/encodings/iso8859_9.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_9 generated from 'MAPPINGS/ISO8859/8859-9.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-9', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/johab.py b/python/Lib/encodings/johab.py deleted file mode 100755 index 512aeeb732..0000000000 --- a/python/Lib/encodings/johab.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# johab.py: Python Unicode Codec for JOHAB -# -# Written by Hye-Shik Chang -# - -import _codecs_kr, codecs -import _multibytecodec as mbc - -codec = _codecs_kr.getcodec('johab') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='johab', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/koi8_r.py b/python/Lib/encodings/koi8_r.py deleted file mode 100755 index f9eb82c0db..0000000000 --- a/python/Lib/encodings/koi8_r.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec koi8_r generated from 'MAPPINGS/VENDORS/MISC/KOI8-R.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='koi8-r', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL - u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u2580' # 0x8B -> UPPER HALF BLOCK - u'\u2584' # 0x8C -> LOWER HALF BLOCK - u'\u2588' # 0x8D -> FULL BLOCK - u'\u258c' # 0x8E -> LEFT HALF BLOCK - u'\u2590' # 0x8F -> RIGHT HALF BLOCK - u'\u2591' # 0x90 -> LIGHT SHADE - u'\u2592' # 0x91 -> MEDIUM SHADE - u'\u2593' # 0x92 -> DARK SHADE - u'\u2320' # 0x93 -> TOP HALF INTEGRAL - u'\u25a0' # 0x94 -> BLACK SQUARE - u'\u2219' # 0x95 -> BULLET OPERATOR - u'\u221a' # 0x96 -> SQUARE ROOT - u'\u2248' # 0x97 -> ALMOST EQUAL TO - u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO - u'\xa0' # 0x9A -> NO-BREAK SPACE - u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL - u'\xb0' # 0x9C -> DEGREE SIGN - u'\xb2' # 0x9D -> SUPERSCRIPT TWO - u'\xb7' # 0x9E -> MIDDLE DOT - u'\xf7' # 0x9F -> DIVISION SIGN - u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO - u'\u2553' # 0xA4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2555' # 0xA6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2556' # 0xA7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u255c' # 0xAD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO - u'\u2562' # 0xB4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2564' # 0xB6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u256b' # 0xBD -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa9' # 0xBF -> COPYRIGHT SIGN - u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU - u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE - u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE - u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE - u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF - u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE - u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA - u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE - u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA - u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U - u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE - u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE - u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU - u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE - u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA - u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E - u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA - u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE - u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU - u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE - u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE - u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE - u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF - u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE - u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA - u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE - u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA - u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U - u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE - u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU - u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE - u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA - u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E - u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE - u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/koi8_u.py b/python/Lib/encodings/koi8_u.py deleted file mode 100755 index a9317b12b7..0000000000 --- a/python/Lib/encodings/koi8_u.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec koi8_u generated from 'python-mappings/KOI8-U.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='koi8-u', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL - u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u2580' # 0x8B -> UPPER HALF BLOCK - u'\u2584' # 0x8C -> LOWER HALF BLOCK - u'\u2588' # 0x8D -> FULL BLOCK - u'\u258c' # 0x8E -> LEFT HALF BLOCK - u'\u2590' # 0x8F -> RIGHT HALF BLOCK - u'\u2591' # 0x90 -> LIGHT SHADE - u'\u2592' # 0x91 -> MEDIUM SHADE - u'\u2593' # 0x92 -> DARK SHADE - u'\u2320' # 0x93 -> TOP HALF INTEGRAL - u'\u25a0' # 0x94 -> BLACK SQUARE - u'\u2219' # 0x95 -> BULLET OPERATOR - u'\u221a' # 0x96 -> SQUARE ROOT - u'\u2248' # 0x97 -> ALMOST EQUAL TO - u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO - u'\xa0' # 0x9A -> NO-BREAK SPACE - u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL - u'\xb0' # 0x9C -> DEGREE SIGN - u'\xb2' # 0x9D -> SUPERSCRIPT TWO - u'\xb7' # 0x9E -> MIDDLE DOT - u'\xf7' # 0x9F -> DIVISION SIGN - u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO - u'\u0454' # 0xA4 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u0456' # 0xA6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0xA7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN) - u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u0491' # 0xAD -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN - u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO - u'\u0404' # 0xB4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u0406' # 0xB6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0407' # 0xB7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN) - u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u0490' # 0xBD -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN - u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa9' # 0xBF -> COPYRIGHT SIGN - u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU - u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE - u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE - u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE - u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF - u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE - u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA - u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE - u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA - u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U - u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE - u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE - u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU - u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE - u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA - u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E - u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA - u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE - u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU - u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE - u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE - u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE - u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF - u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE - u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA - u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE - u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA - u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U - u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE - u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU - u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE - u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA - u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E - u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE - u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/latin_1.py b/python/Lib/encodings/latin_1.py deleted file mode 100755 index 370160c0cb..0000000000 --- a/python/Lib/encodings/latin_1.py +++ /dev/null @@ -1,50 +0,0 @@ -""" Python 'latin-1' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.latin_1_encode - decode = codecs.latin_1_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.latin_1_encode(input,self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.latin_1_decode(input,self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -class StreamConverter(StreamWriter,StreamReader): - - encode = codecs.latin_1_decode - decode = codecs.latin_1_encode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-1', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/mac_arabic.py b/python/Lib/encodings/mac_arabic.py deleted file mode 100755 index 7a7d3c5f7f..0000000000 --- a/python/Lib/encodings/mac_arabic.py +++ /dev/null @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/ARABIC.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-arabic', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00a0, # NO-BREAK SPACE, right-left - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x06ba, # ARABIC LETTER NOON GHUNNA - 0x008c: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x2026, # HORIZONTAL ELLIPSIS, right-left - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f7, # DIVISION SIGN, right-left - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x0020, # SPACE, right-left - 0x00a1: 0x0021, # EXCLAMATION MARK, right-left - 0x00a2: 0x0022, # QUOTATION MARK, right-left - 0x00a3: 0x0023, # NUMBER SIGN, right-left - 0x00a4: 0x0024, # DOLLAR SIGN, right-left - 0x00a5: 0x066a, # ARABIC PERCENT SIGN - 0x00a6: 0x0026, # AMPERSAND, right-left - 0x00a7: 0x0027, # APOSTROPHE, right-left - 0x00a8: 0x0028, # LEFT PARENTHESIS, right-left - 0x00a9: 0x0029, # RIGHT PARENTHESIS, right-left - 0x00aa: 0x002a, # ASTERISK, right-left - 0x00ab: 0x002b, # PLUS SIGN, right-left - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ad: 0x002d, # HYPHEN-MINUS, right-left - 0x00ae: 0x002e, # FULL STOP, right-left - 0x00af: 0x002f, # SOLIDUS, right-left - 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO, right-left (need override) - 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE, right-left (need override) - 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO, right-left (need override) - 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE, right-left (need override) - 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR, right-left (need override) - 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE, right-left (need override) - 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX, right-left (need override) - 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) - 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) - 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE, right-left (need override) - 0x00ba: 0x003a, # COLON, right-left - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: 0x003c, # LESS-THAN SIGN, right-left - 0x00bd: 0x003d, # EQUALS SIGN, right-left - 0x00be: 0x003e, # GREATER-THAN SIGN, right-left - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x274a, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left - 0x00c1: 0x0621, # ARABIC LETTER HAMZA - 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x00c7: 0x0627, # ARABIC LETTER ALEF - 0x00c8: 0x0628, # ARABIC LETTER BEH - 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA - 0x00ca: 0x062a, # ARABIC LETTER TEH - 0x00cb: 0x062b, # ARABIC LETTER THEH - 0x00cc: 0x062c, # ARABIC LETTER JEEM - 0x00cd: 0x062d, # ARABIC LETTER HAH - 0x00ce: 0x062e, # ARABIC LETTER KHAH - 0x00cf: 0x062f, # ARABIC LETTER DAL - 0x00d0: 0x0630, # ARABIC LETTER THAL - 0x00d1: 0x0631, # ARABIC LETTER REH - 0x00d2: 0x0632, # ARABIC LETTER ZAIN - 0x00d3: 0x0633, # ARABIC LETTER SEEN - 0x00d4: 0x0634, # ARABIC LETTER SHEEN - 0x00d5: 0x0635, # ARABIC LETTER SAD - 0x00d6: 0x0636, # ARABIC LETTER DAD - 0x00d7: 0x0637, # ARABIC LETTER TAH - 0x00d8: 0x0638, # ARABIC LETTER ZAH - 0x00d9: 0x0639, # ARABIC LETTER AIN - 0x00da: 0x063a, # ARABIC LETTER GHAIN - 0x00db: 0x005b, # LEFT SQUARE BRACKET, right-left - 0x00dc: 0x005c, # REVERSE SOLIDUS, right-left - 0x00dd: 0x005d, # RIGHT SQUARE BRACKET, right-left - 0x00de: 0x005e, # CIRCUMFLEX ACCENT, right-left - 0x00df: 0x005f, # LOW LINE, right-left - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0x0641, # ARABIC LETTER FEH - 0x00e2: 0x0642, # ARABIC LETTER QAF - 0x00e3: 0x0643, # ARABIC LETTER KAF - 0x00e4: 0x0644, # ARABIC LETTER LAM - 0x00e5: 0x0645, # ARABIC LETTER MEEM - 0x00e6: 0x0646, # ARABIC LETTER NOON - 0x00e7: 0x0647, # ARABIC LETTER HEH - 0x00e8: 0x0648, # ARABIC LETTER WAW - 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA - 0x00ea: 0x064a, # ARABIC LETTER YEH - 0x00eb: 0x064b, # ARABIC FATHATAN - 0x00ec: 0x064c, # ARABIC DAMMATAN - 0x00ed: 0x064d, # ARABIC KASRATAN - 0x00ee: 0x064e, # ARABIC FATHA - 0x00ef: 0x064f, # ARABIC DAMMA - 0x00f0: 0x0650, # ARABIC KASRA - 0x00f1: 0x0651, # ARABIC SHADDA - 0x00f2: 0x0652, # ARABIC SUKUN - 0x00f3: 0x067e, # ARABIC LETTER PEH - 0x00f4: 0x0679, # ARABIC LETTER TTEH - 0x00f5: 0x0686, # ARABIC LETTER TCHEH - 0x00f6: 0x06d5, # ARABIC LETTER AE - 0x00f7: 0x06a4, # ARABIC LETTER VEH - 0x00f8: 0x06af, # ARABIC LETTER GAF - 0x00f9: 0x0688, # ARABIC LETTER DDAL - 0x00fa: 0x0691, # ARABIC LETTER RREH - 0x00fb: 0x007b, # LEFT CURLY BRACKET, right-left - 0x00fc: 0x007c, # VERTICAL LINE, right-left - 0x00fd: 0x007d, # RIGHT CURLY BRACKET, right-left - 0x00fe: 0x0698, # ARABIC LETTER JEH - 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> CONTROL CHARACTER - u'\x01' # 0x0001 -> CONTROL CHARACTER - u'\x02' # 0x0002 -> CONTROL CHARACTER - u'\x03' # 0x0003 -> CONTROL CHARACTER - u'\x04' # 0x0004 -> CONTROL CHARACTER - u'\x05' # 0x0005 -> CONTROL CHARACTER - u'\x06' # 0x0006 -> CONTROL CHARACTER - u'\x07' # 0x0007 -> CONTROL CHARACTER - u'\x08' # 0x0008 -> CONTROL CHARACTER - u'\t' # 0x0009 -> CONTROL CHARACTER - u'\n' # 0x000a -> CONTROL CHARACTER - u'\x0b' # 0x000b -> CONTROL CHARACTER - u'\x0c' # 0x000c -> CONTROL CHARACTER - u'\r' # 0x000d -> CONTROL CHARACTER - u'\x0e' # 0x000e -> CONTROL CHARACTER - u'\x0f' # 0x000f -> CONTROL CHARACTER - u'\x10' # 0x0010 -> CONTROL CHARACTER - u'\x11' # 0x0011 -> CONTROL CHARACTER - u'\x12' # 0x0012 -> CONTROL CHARACTER - u'\x13' # 0x0013 -> CONTROL CHARACTER - u'\x14' # 0x0014 -> CONTROL CHARACTER - u'\x15' # 0x0015 -> CONTROL CHARACTER - u'\x16' # 0x0016 -> CONTROL CHARACTER - u'\x17' # 0x0017 -> CONTROL CHARACTER - u'\x18' # 0x0018 -> CONTROL CHARACTER - u'\x19' # 0x0019 -> CONTROL CHARACTER - u'\x1a' # 0x001a -> CONTROL CHARACTER - u'\x1b' # 0x001b -> CONTROL CHARACTER - u'\x1c' # 0x001c -> CONTROL CHARACTER - u'\x1d' # 0x001d -> CONTROL CHARACTER - u'\x1e' # 0x001e -> CONTROL CHARACTER - u'\x1f' # 0x001f -> CONTROL CHARACTER - u' ' # 0x0020 -> SPACE, left-right - u'!' # 0x0021 -> EXCLAMATION MARK, left-right - u'"' # 0x0022 -> QUOTATION MARK, left-right - u'#' # 0x0023 -> NUMBER SIGN, left-right - u'$' # 0x0024 -> DOLLAR SIGN, left-right - u'%' # 0x0025 -> PERCENT SIGN, left-right - u'&' # 0x0026 -> AMPERSAND, left-right - u"'" # 0x0027 -> APOSTROPHE, left-right - u'(' # 0x0028 -> LEFT PARENTHESIS, left-right - u')' # 0x0029 -> RIGHT PARENTHESIS, left-right - u'*' # 0x002a -> ASTERISK, left-right - u'+' # 0x002b -> PLUS SIGN, left-right - u',' # 0x002c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - u'-' # 0x002d -> HYPHEN-MINUS, left-right - u'.' # 0x002e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - u'/' # 0x002f -> SOLIDUS, left-right - u'0' # 0x0030 -> DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE - u':' # 0x003a -> COLON, left-right - u';' # 0x003b -> SEMICOLON, left-right - u'<' # 0x003c -> LESS-THAN SIGN, left-right - u'=' # 0x003d -> EQUALS SIGN, left-right - u'>' # 0x003e -> GREATER-THAN SIGN, left-right - u'?' # 0x003f -> QUESTION MARK, left-right - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET, left-right - u'\\' # 0x005c -> REVERSE SOLIDUS, left-right - u']' # 0x005d -> RIGHT SQUARE BRACKET, left-right - u'^' # 0x005e -> CIRCUMFLEX ACCENT, left-right - u'_' # 0x005f -> LOW LINE, left-right - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET, left-right - u'|' # 0x007c -> VERTICAL LINE, left-right - u'}' # 0x007d -> RIGHT CURLY BRACKET, left-right - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> CONTROL CHARACTER - u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xa0' # 0x0081 -> NO-BREAK SPACE, right-left - u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u06ba' # 0x008b -> ARABIC LETTER NOON GHUNNA - u'\xab' # 0x008c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE - u'\u2026' # 0x0093 -> HORIZONTAL ELLIPSIS, right-left - u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE - u'\xbb' # 0x0098 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0x009b -> DIVISION SIGN, right-left - u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS - u' ' # 0x00a0 -> SPACE, right-left - u'!' # 0x00a1 -> EXCLAMATION MARK, right-left - u'"' # 0x00a2 -> QUOTATION MARK, right-left - u'#' # 0x00a3 -> NUMBER SIGN, right-left - u'$' # 0x00a4 -> DOLLAR SIGN, right-left - u'\u066a' # 0x00a5 -> ARABIC PERCENT SIGN - u'&' # 0x00a6 -> AMPERSAND, right-left - u"'" # 0x00a7 -> APOSTROPHE, right-left - u'(' # 0x00a8 -> LEFT PARENTHESIS, right-left - u')' # 0x00a9 -> RIGHT PARENTHESIS, right-left - u'*' # 0x00aa -> ASTERISK, right-left - u'+' # 0x00ab -> PLUS SIGN, right-left - u'\u060c' # 0x00ac -> ARABIC COMMA - u'-' # 0x00ad -> HYPHEN-MINUS, right-left - u'.' # 0x00ae -> FULL STOP, right-left - u'/' # 0x00af -> SOLIDUS, right-left - u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO, right-left (need override) - u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE, right-left (need override) - u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO, right-left (need override) - u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE, right-left (need override) - u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR, right-left (need override) - u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE, right-left (need override) - u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX, right-left (need override) - u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN, right-left (need override) - u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT, right-left (need override) - u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE, right-left (need override) - u':' # 0x00ba -> COLON, right-left - u'\u061b' # 0x00bb -> ARABIC SEMICOLON - u'<' # 0x00bc -> LESS-THAN SIGN, right-left - u'=' # 0x00bd -> EQUALS SIGN, right-left - u'>' # 0x00be -> GREATER-THAN SIGN, right-left - u'\u061f' # 0x00bf -> ARABIC QUESTION MARK - u'\u274a' # 0x00c0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left - u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA - u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF - u'\u0628' # 0x00c8 -> ARABIC LETTER BEH - u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0x00ca -> ARABIC LETTER TEH - u'\u062b' # 0x00cb -> ARABIC LETTER THEH - u'\u062c' # 0x00cc -> ARABIC LETTER JEEM - u'\u062d' # 0x00cd -> ARABIC LETTER HAH - u'\u062e' # 0x00ce -> ARABIC LETTER KHAH - u'\u062f' # 0x00cf -> ARABIC LETTER DAL - u'\u0630' # 0x00d0 -> ARABIC LETTER THAL - u'\u0631' # 0x00d1 -> ARABIC LETTER REH - u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN - u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN - u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN - u'\u0635' # 0x00d5 -> ARABIC LETTER SAD - u'\u0636' # 0x00d6 -> ARABIC LETTER DAD - u'\u0637' # 0x00d7 -> ARABIC LETTER TAH - u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH - u'\u0639' # 0x00d9 -> ARABIC LETTER AIN - u'\u063a' # 0x00da -> ARABIC LETTER GHAIN - u'[' # 0x00db -> LEFT SQUARE BRACKET, right-left - u'\\' # 0x00dc -> REVERSE SOLIDUS, right-left - u']' # 0x00dd -> RIGHT SQUARE BRACKET, right-left - u'^' # 0x00de -> CIRCUMFLEX ACCENT, right-left - u'_' # 0x00df -> LOW LINE, right-left - u'\u0640' # 0x00e0 -> ARABIC TATWEEL - u'\u0641' # 0x00e1 -> ARABIC LETTER FEH - u'\u0642' # 0x00e2 -> ARABIC LETTER QAF - u'\u0643' # 0x00e3 -> ARABIC LETTER KAF - u'\u0644' # 0x00e4 -> ARABIC LETTER LAM - u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM - u'\u0646' # 0x00e6 -> ARABIC LETTER NOON - u'\u0647' # 0x00e7 -> ARABIC LETTER HEH - u'\u0648' # 0x00e8 -> ARABIC LETTER WAW - u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0x00ea -> ARABIC LETTER YEH - u'\u064b' # 0x00eb -> ARABIC FATHATAN - u'\u064c' # 0x00ec -> ARABIC DAMMATAN - u'\u064d' # 0x00ed -> ARABIC KASRATAN - u'\u064e' # 0x00ee -> ARABIC FATHA - u'\u064f' # 0x00ef -> ARABIC DAMMA - u'\u0650' # 0x00f0 -> ARABIC KASRA - u'\u0651' # 0x00f1 -> ARABIC SHADDA - u'\u0652' # 0x00f2 -> ARABIC SUKUN - u'\u067e' # 0x00f3 -> ARABIC LETTER PEH - u'\u0679' # 0x00f4 -> ARABIC LETTER TTEH - u'\u0686' # 0x00f5 -> ARABIC LETTER TCHEH - u'\u06d5' # 0x00f6 -> ARABIC LETTER AE - u'\u06a4' # 0x00f7 -> ARABIC LETTER VEH - u'\u06af' # 0x00f8 -> ARABIC LETTER GAF - u'\u0688' # 0x00f9 -> ARABIC LETTER DDAL - u'\u0691' # 0x00fa -> ARABIC LETTER RREH - u'{' # 0x00fb -> LEFT CURLY BRACKET, right-left - u'|' # 0x00fc -> VERTICAL LINE, right-left - u'}' # 0x00fd -> RIGHT CURLY BRACKET, right-left - u'\u0698' # 0x00fe -> ARABIC LETTER JEH - u'\u06d2' # 0x00ff -> ARABIC LETTER YEH BARREE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # CONTROL CHARACTER - 0x0001: 0x0001, # CONTROL CHARACTER - 0x0002: 0x0002, # CONTROL CHARACTER - 0x0003: 0x0003, # CONTROL CHARACTER - 0x0004: 0x0004, # CONTROL CHARACTER - 0x0005: 0x0005, # CONTROL CHARACTER - 0x0006: 0x0006, # CONTROL CHARACTER - 0x0007: 0x0007, # CONTROL CHARACTER - 0x0008: 0x0008, # CONTROL CHARACTER - 0x0009: 0x0009, # CONTROL CHARACTER - 0x000a: 0x000a, # CONTROL CHARACTER - 0x000b: 0x000b, # CONTROL CHARACTER - 0x000c: 0x000c, # CONTROL CHARACTER - 0x000d: 0x000d, # CONTROL CHARACTER - 0x000e: 0x000e, # CONTROL CHARACTER - 0x000f: 0x000f, # CONTROL CHARACTER - 0x0010: 0x0010, # CONTROL CHARACTER - 0x0011: 0x0011, # CONTROL CHARACTER - 0x0012: 0x0012, # CONTROL CHARACTER - 0x0013: 0x0013, # CONTROL CHARACTER - 0x0014: 0x0014, # CONTROL CHARACTER - 0x0015: 0x0015, # CONTROL CHARACTER - 0x0016: 0x0016, # CONTROL CHARACTER - 0x0017: 0x0017, # CONTROL CHARACTER - 0x0018: 0x0018, # CONTROL CHARACTER - 0x0019: 0x0019, # CONTROL CHARACTER - 0x001a: 0x001a, # CONTROL CHARACTER - 0x001b: 0x001b, # CONTROL CHARACTER - 0x001c: 0x001c, # CONTROL CHARACTER - 0x001d: 0x001d, # CONTROL CHARACTER - 0x001e: 0x001e, # CONTROL CHARACTER - 0x001f: 0x001f, # CONTROL CHARACTER - 0x0020: 0x0020, # SPACE, left-right - 0x0020: 0x00a0, # SPACE, right-left - 0x0021: 0x0021, # EXCLAMATION MARK, left-right - 0x0021: 0x00a1, # EXCLAMATION MARK, right-left - 0x0022: 0x0022, # QUOTATION MARK, left-right - 0x0022: 0x00a2, # QUOTATION MARK, right-left - 0x0023: 0x0023, # NUMBER SIGN, left-right - 0x0023: 0x00a3, # NUMBER SIGN, right-left - 0x0024: 0x0024, # DOLLAR SIGN, left-right - 0x0024: 0x00a4, # DOLLAR SIGN, right-left - 0x0025: 0x0025, # PERCENT SIGN, left-right - 0x0026: 0x0026, # AMPERSAND, left-right - 0x0026: 0x00a6, # AMPERSAND, right-left - 0x0027: 0x0027, # APOSTROPHE, left-right - 0x0027: 0x00a7, # APOSTROPHE, right-left - 0x0028: 0x0028, # LEFT PARENTHESIS, left-right - 0x0028: 0x00a8, # LEFT PARENTHESIS, right-left - 0x0029: 0x0029, # RIGHT PARENTHESIS, left-right - 0x0029: 0x00a9, # RIGHT PARENTHESIS, right-left - 0x002a: 0x002a, # ASTERISK, left-right - 0x002a: 0x00aa, # ASTERISK, right-left - 0x002b: 0x002b, # PLUS SIGN, left-right - 0x002b: 0x00ab, # PLUS SIGN, right-left - 0x002c: 0x002c, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - 0x002d: 0x002d, # HYPHEN-MINUS, left-right - 0x002d: 0x00ad, # HYPHEN-MINUS, right-left - 0x002e: 0x002e, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - 0x002e: 0x00ae, # FULL STOP, right-left - 0x002f: 0x002f, # SOLIDUS, left-right - 0x002f: 0x00af, # SOLIDUS, right-left - 0x0030: 0x0030, # DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE - 0x003a: 0x003a, # COLON, left-right - 0x003a: 0x00ba, # COLON, right-left - 0x003b: 0x003b, # SEMICOLON, left-right - 0x003c: 0x003c, # LESS-THAN SIGN, left-right - 0x003c: 0x00bc, # LESS-THAN SIGN, right-left - 0x003d: 0x003d, # EQUALS SIGN, left-right - 0x003d: 0x00bd, # EQUALS SIGN, right-left - 0x003e: 0x003e, # GREATER-THAN SIGN, left-right - 0x003e: 0x00be, # GREATER-THAN SIGN, right-left - 0x003f: 0x003f, # QUESTION MARK, left-right - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET, left-right - 0x005b: 0x00db, # LEFT SQUARE BRACKET, right-left - 0x005c: 0x005c, # REVERSE SOLIDUS, left-right - 0x005c: 0x00dc, # REVERSE SOLIDUS, right-left - 0x005d: 0x005d, # RIGHT SQUARE BRACKET, left-right - 0x005d: 0x00dd, # RIGHT SQUARE BRACKET, right-left - 0x005e: 0x005e, # CIRCUMFLEX ACCENT, left-right - 0x005e: 0x00de, # CIRCUMFLEX ACCENT, right-left - 0x005f: 0x005f, # LOW LINE, left-right - 0x005f: 0x00df, # LOW LINE, right-left - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET, left-right - 0x007b: 0x00fb, # LEFT CURLY BRACKET, right-left - 0x007c: 0x007c, # VERTICAL LINE, left-right - 0x007c: 0x00fc, # VERTICAL LINE, right-left - 0x007d: 0x007d, # RIGHT CURLY BRACKET, left-right - 0x007d: 0x00fd, # RIGHT CURLY BRACKET, right-left - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # CONTROL CHARACTER - 0x00a0: 0x0081, # NO-BREAK SPACE, right-left - 0x00ab: 0x008c, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00bb: 0x0098, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE - 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x009b, # DIVISION SIGN, right-left - 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x060c: 0x00ac, # ARABIC COMMA - 0x061b: 0x00bb, # ARABIC SEMICOLON - 0x061f: 0x00bf, # ARABIC QUESTION MARK - 0x0621: 0x00c1, # ARABIC LETTER HAMZA - 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0x00c7, # ARABIC LETTER ALEF - 0x0628: 0x00c8, # ARABIC LETTER BEH - 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA - 0x062a: 0x00ca, # ARABIC LETTER TEH - 0x062b: 0x00cb, # ARABIC LETTER THEH - 0x062c: 0x00cc, # ARABIC LETTER JEEM - 0x062d: 0x00cd, # ARABIC LETTER HAH - 0x062e: 0x00ce, # ARABIC LETTER KHAH - 0x062f: 0x00cf, # ARABIC LETTER DAL - 0x0630: 0x00d0, # ARABIC LETTER THAL - 0x0631: 0x00d1, # ARABIC LETTER REH - 0x0632: 0x00d2, # ARABIC LETTER ZAIN - 0x0633: 0x00d3, # ARABIC LETTER SEEN - 0x0634: 0x00d4, # ARABIC LETTER SHEEN - 0x0635: 0x00d5, # ARABIC LETTER SAD - 0x0636: 0x00d6, # ARABIC LETTER DAD - 0x0637: 0x00d7, # ARABIC LETTER TAH - 0x0638: 0x00d8, # ARABIC LETTER ZAH - 0x0639: 0x00d9, # ARABIC LETTER AIN - 0x063a: 0x00da, # ARABIC LETTER GHAIN - 0x0640: 0x00e0, # ARABIC TATWEEL - 0x0641: 0x00e1, # ARABIC LETTER FEH - 0x0642: 0x00e2, # ARABIC LETTER QAF - 0x0643: 0x00e3, # ARABIC LETTER KAF - 0x0644: 0x00e4, # ARABIC LETTER LAM - 0x0645: 0x00e5, # ARABIC LETTER MEEM - 0x0646: 0x00e6, # ARABIC LETTER NOON - 0x0647: 0x00e7, # ARABIC LETTER HEH - 0x0648: 0x00e8, # ARABIC LETTER WAW - 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA - 0x064a: 0x00ea, # ARABIC LETTER YEH - 0x064b: 0x00eb, # ARABIC FATHATAN - 0x064c: 0x00ec, # ARABIC DAMMATAN - 0x064d: 0x00ed, # ARABIC KASRATAN - 0x064e: 0x00ee, # ARABIC FATHA - 0x064f: 0x00ef, # ARABIC DAMMA - 0x0650: 0x00f0, # ARABIC KASRA - 0x0651: 0x00f1, # ARABIC SHADDA - 0x0652: 0x00f2, # ARABIC SUKUN - 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO, right-left (need override) - 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE, right-left (need override) - 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO, right-left (need override) - 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE, right-left (need override) - 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR, right-left (need override) - 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE, right-left (need override) - 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX, right-left (need override) - 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) - 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) - 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE, right-left (need override) - 0x066a: 0x00a5, # ARABIC PERCENT SIGN - 0x0679: 0x00f4, # ARABIC LETTER TTEH - 0x067e: 0x00f3, # ARABIC LETTER PEH - 0x0686: 0x00f5, # ARABIC LETTER TCHEH - 0x0688: 0x00f9, # ARABIC LETTER DDAL - 0x0691: 0x00fa, # ARABIC LETTER RREH - 0x0698: 0x00fe, # ARABIC LETTER JEH - 0x06a4: 0x00f7, # ARABIC LETTER VEH - 0x06af: 0x00f8, # ARABIC LETTER GAF - 0x06ba: 0x008b, # ARABIC LETTER NOON GHUNNA - 0x06d2: 0x00ff, # ARABIC LETTER YEH BARREE - 0x06d5: 0x00f6, # ARABIC LETTER AE - 0x2026: 0x0093, # HORIZONTAL ELLIPSIS, right-left - 0x274a: 0x00c0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left -} diff --git a/python/Lib/encodings/mac_centeuro.py b/python/Lib/encodings/mac_centeuro.py deleted file mode 100755 index 483c8212ac..0000000000 --- a/python/Lib/encodings/mac_centeuro.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_centeuro generated from 'MAPPINGS/VENDORS/APPLE/CENTEURO.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-centeuro', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0100' # 0x81 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0101' # 0x82 -> LATIN SMALL LETTER A WITH MACRON - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0104' # 0x84 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\u0105' # 0x88 -> LATIN SMALL LETTER A WITH OGONEK - u'\u010c' # 0x89 -> LATIN CAPITAL LETTER C WITH CARON - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u010d' # 0x8B -> LATIN SMALL LETTER C WITH CARON - u'\u0106' # 0x8C -> LATIN CAPITAL LETTER C WITH ACUTE - u'\u0107' # 0x8D -> LATIN SMALL LETTER C WITH ACUTE - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u017a' # 0x90 -> LATIN SMALL LETTER Z WITH ACUTE - u'\u010e' # 0x91 -> LATIN CAPITAL LETTER D WITH CARON - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\u010f' # 0x93 -> LATIN SMALL LETTER D WITH CARON - u'\u0112' # 0x94 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0113' # 0x95 -> LATIN SMALL LETTER E WITH MACRON - u'\u0116' # 0x96 -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\u0117' # 0x98 -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\u011a' # 0x9D -> LATIN CAPITAL LETTER E WITH CARON - u'\u011b' # 0x9E -> LATIN SMALL LETTER E WITH CARON - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\u0118' # 0xA2 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\u0119' # 0xAB -> LATIN SMALL LETTER E WITH OGONEK - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u0123' # 0xAE -> LATIN SMALL LETTER G WITH CEDILLA - u'\u012e' # 0xAF -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u012f' # 0xB0 -> LATIN SMALL LETTER I WITH OGONEK - u'\u012a' # 0xB1 -> LATIN CAPITAL LETTER I WITH MACRON - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON - u'\u0136' # 0xB5 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u0142' # 0xB8 -> LATIN SMALL LETTER L WITH STROKE - u'\u013b' # 0xB9 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u013c' # 0xBA -> LATIN SMALL LETTER L WITH CEDILLA - u'\u013d' # 0xBB -> LATIN CAPITAL LETTER L WITH CARON - u'\u013e' # 0xBC -> LATIN SMALL LETTER L WITH CARON - u'\u0139' # 0xBD -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u013a' # 0xBE -> LATIN SMALL LETTER L WITH ACUTE - u'\u0145' # 0xBF -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u0146' # 0xC0 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u0143' # 0xC1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0144' # 0xC4 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0147' # 0xC5 -> LATIN CAPITAL LETTER N WITH CARON - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\u0148' # 0xCB -> LATIN SMALL LETTER N WITH CARON - u'\u0150' # 0xCC -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0151' # 0xCE -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\u014c' # 0xCF -> LATIN CAPITAL LETTER O WITH MACRON - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\u014d' # 0xD8 -> LATIN SMALL LETTER O WITH MACRON - u'\u0154' # 0xD9 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\u0155' # 0xDA -> LATIN SMALL LETTER R WITH ACUTE - u'\u0158' # 0xDB -> LATIN CAPITAL LETTER R WITH CARON - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0159' # 0xDE -> LATIN SMALL LETTER R WITH CARON - u'\u0156' # 0xDF -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\u0157' # 0xE0 -> LATIN SMALL LETTER R WITH CEDILLA - u'\u0160' # 0xE1 -> LATIN CAPITAL LETTER S WITH CARON - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u0161' # 0xE4 -> LATIN SMALL LETTER S WITH CARON - u'\u015a' # 0xE5 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u015b' # 0xE6 -> LATIN SMALL LETTER S WITH ACUTE - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\u0164' # 0xE8 -> LATIN CAPITAL LETTER T WITH CARON - u'\u0165' # 0xE9 -> LATIN SMALL LETTER T WITH CARON - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\u017d' # 0xEB -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017e' # 0xEC -> LATIN SMALL LETTER Z WITH CARON - u'\u016a' # 0xED -> LATIN CAPITAL LETTER U WITH MACRON - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u016b' # 0xF0 -> LATIN SMALL LETTER U WITH MACRON - u'\u016e' # 0xF1 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u016f' # 0xF3 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\u0170' # 0xF4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\u0171' # 0xF5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\u0172' # 0xF6 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0173' # 0xF7 -> LATIN SMALL LETTER U WITH OGONEK - u'\xdd' # 0xF8 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xfd' # 0xF9 -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0137' # 0xFA -> LATIN SMALL LETTER K WITH CEDILLA - u'\u017b' # 0xFB -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u0141' # 0xFC -> LATIN CAPITAL LETTER L WITH STROKE - u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0122' # 0xFE -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/mac_croatian.py b/python/Lib/encodings/mac_croatian.py deleted file mode 100755 index f57f7b4b33..0000000000 --- a/python/Lib/encodings/mac_croatian.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_croatian generated from 'MAPPINGS/VENDORS/APPLE/CROATIAN.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-croatian', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\u2206' # 0xB4 -> INCREMENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\uf8ff' # 0xD8 -> Apple logo - u'\xa9' # 0xD9 -> COPYRIGHT SIGN - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\xc6' # 0xDE -> LATIN CAPITAL LETTER AE - u'\xbb' # 0xDF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2013' # 0xE0 -> EN DASH - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u03c0' # 0xF9 -> GREEK SMALL LETTER PI - u'\xcb' # 0xFA -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\xca' # 0xFD -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xe6' # 0xFE -> LATIN SMALL LETTER AE - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/mac_cyrillic.py b/python/Lib/encodings/mac_cyrillic.py deleted file mode 100755 index 63324a14b8..0000000000 --- a/python/Lib/encodings/mac_cyrillic.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_cyrillic generated from 'MAPPINGS/VENDORS/APPLE/CYRILLIC.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-cyrillic', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\u0410' # 0x80 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0x81 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0x82 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0x83 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0x84 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0x85 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0x86 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0x87 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0x88 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0x89 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0x8A -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0x8B -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0x8C -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0x8D -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0x8E -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0x8F -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0x90 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0x91 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0x92 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0x93 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0x94 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0x95 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0x96 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0x97 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0x98 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0x99 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0x9A -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0x9B -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0x9C -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0x9D -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0x9E -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0x9F -> CYRILLIC CAPITAL LETTER YA - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\u0490' # 0xA2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\u0406' # 0xA7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\u0402' # 0xAB -> CYRILLIC CAPITAL LETTER DJE - u'\u0452' # 0xAC -> CYRILLIC SMALL LETTER DJE - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u0403' # 0xAE -> CYRILLIC CAPITAL LETTER GJE - u'\u0453' # 0xAF -> CYRILLIC SMALL LETTER GJE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\u0456' # 0xB4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u0491' # 0xB6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN - u'\u0408' # 0xB7 -> CYRILLIC CAPITAL LETTER JE - u'\u0404' # 0xB8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0454' # 0xB9 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0407' # 0xBA -> CYRILLIC CAPITAL LETTER YI - u'\u0457' # 0xBB -> CYRILLIC SMALL LETTER YI - u'\u0409' # 0xBC -> CYRILLIC CAPITAL LETTER LJE - u'\u0459' # 0xBD -> CYRILLIC SMALL LETTER LJE - u'\u040a' # 0xBE -> CYRILLIC CAPITAL LETTER NJE - u'\u045a' # 0xBF -> CYRILLIC SMALL LETTER NJE - u'\u0458' # 0xC0 -> CYRILLIC SMALL LETTER JE - u'\u0405' # 0xC1 -> CYRILLIC CAPITAL LETTER DZE - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\u040b' # 0xCB -> CYRILLIC CAPITAL LETTER TSHE - u'\u045b' # 0xCC -> CYRILLIC SMALL LETTER TSHE - u'\u040c' # 0xCD -> CYRILLIC CAPITAL LETTER KJE - u'\u045c' # 0xCE -> CYRILLIC SMALL LETTER KJE - u'\u0455' # 0xCF -> CYRILLIC SMALL LETTER DZE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u201e' # 0xD7 -> DOUBLE LOW-9 QUOTATION MARK - u'\u040e' # 0xD8 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0xD9 -> CYRILLIC SMALL LETTER SHORT U - u'\u040f' # 0xDA -> CYRILLIC CAPITAL LETTER DZHE - u'\u045f' # 0xDB -> CYRILLIC SMALL LETTER DZHE - u'\u2116' # 0xDC -> NUMERO SIGN - u'\u0401' # 0xDD -> CYRILLIC CAPITAL LETTER IO - u'\u0451' # 0xDE -> CYRILLIC SMALL LETTER IO - u'\u044f' # 0xDF -> CYRILLIC SMALL LETTER YA - u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU - u'\u20ac' # 0xFF -> EURO SIGN -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/mac_farsi.py b/python/Lib/encodings/mac_farsi.py deleted file mode 100755 index 9dbd76a232..0000000000 --- a/python/Lib/encodings/mac_farsi.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_farsi generated from 'MAPPINGS/VENDORS/APPLE/FARSI.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-farsi', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE, left-right - u'!' # 0x21 -> EXCLAMATION MARK, left-right - u'"' # 0x22 -> QUOTATION MARK, left-right - u'#' # 0x23 -> NUMBER SIGN, left-right - u'$' # 0x24 -> DOLLAR SIGN, left-right - u'%' # 0x25 -> PERCENT SIGN, left-right - u'&' # 0x26 -> AMPERSAND, left-right - u"'" # 0x27 -> APOSTROPHE, left-right - u'(' # 0x28 -> LEFT PARENTHESIS, left-right - u')' # 0x29 -> RIGHT PARENTHESIS, left-right - u'*' # 0x2A -> ASTERISK, left-right - u'+' # 0x2B -> PLUS SIGN, left-right - u',' # 0x2C -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - u'-' # 0x2D -> HYPHEN-MINUS, left-right - u'.' # 0x2E -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - u'/' # 0x2F -> SOLIDUS, left-right - u'0' # 0x30 -> DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE - u'2' # 0x32 -> DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO - u'3' # 0x33 -> DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE - u':' # 0x3A -> COLON, left-right - u';' # 0x3B -> SEMICOLON, left-right - u'<' # 0x3C -> LESS-THAN SIGN, left-right - u'=' # 0x3D -> EQUALS SIGN, left-right - u'>' # 0x3E -> GREATER-THAN SIGN, left-right - u'?' # 0x3F -> QUESTION MARK, left-right - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET, left-right - u'\\' # 0x5C -> REVERSE SOLIDUS, left-right - u']' # 0x5D -> RIGHT SQUARE BRACKET, left-right - u'^' # 0x5E -> CIRCUMFLEX ACCENT, left-right - u'_' # 0x5F -> LOW LINE, left-right - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET, left-right - u'|' # 0x7C -> VERTICAL LINE, left-right - u'}' # 0x7D -> RIGHT CURLY BRACKET, left-right - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xa0' # 0x81 -> NO-BREAK SPACE, right-left - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u06ba' # 0x8B -> ARABIC LETTER NOON GHUNNA - u'\xab' # 0x8C -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\u2026' # 0x93 -> HORIZONTAL ELLIPSIS, right-left - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xbb' # 0x98 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0x9B -> DIVISION SIGN, right-left - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u' ' # 0xA0 -> SPACE, right-left - u'!' # 0xA1 -> EXCLAMATION MARK, right-left - u'"' # 0xA2 -> QUOTATION MARK, right-left - u'#' # 0xA3 -> NUMBER SIGN, right-left - u'$' # 0xA4 -> DOLLAR SIGN, right-left - u'\u066a' # 0xA5 -> ARABIC PERCENT SIGN - u'&' # 0xA6 -> AMPERSAND, right-left - u"'" # 0xA7 -> APOSTROPHE, right-left - u'(' # 0xA8 -> LEFT PARENTHESIS, right-left - u')' # 0xA9 -> RIGHT PARENTHESIS, right-left - u'*' # 0xAA -> ASTERISK, right-left - u'+' # 0xAB -> PLUS SIGN, right-left - u'\u060c' # 0xAC -> ARABIC COMMA - u'-' # 0xAD -> HYPHEN-MINUS, right-left - u'.' # 0xAE -> FULL STOP, right-left - u'/' # 0xAF -> SOLIDUS, right-left - u'\u06f0' # 0xB0 -> EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) - u'\u06f1' # 0xB1 -> EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) - u'\u06f2' # 0xB2 -> EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) - u'\u06f3' # 0xB3 -> EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) - u'\u06f4' # 0xB4 -> EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) - u'\u06f5' # 0xB5 -> EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) - u'\u06f6' # 0xB6 -> EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) - u'\u06f7' # 0xB7 -> EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) - u'\u06f8' # 0xB8 -> EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) - u'\u06f9' # 0xB9 -> EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) - u':' # 0xBA -> COLON, right-left - u'\u061b' # 0xBB -> ARABIC SEMICOLON - u'<' # 0xBC -> LESS-THAN SIGN, right-left - u'=' # 0xBD -> EQUALS SIGN, right-left - u'>' # 0xBE -> GREATER-THAN SIGN, right-left - u'\u061f' # 0xBF -> ARABIC QUESTION MARK - u'\u274a' # 0xC0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left - u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xC7 -> ARABIC LETTER ALEF - u'\u0628' # 0xC8 -> ARABIC LETTER BEH - u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xCA -> ARABIC LETTER TEH - u'\u062b' # 0xCB -> ARABIC LETTER THEH - u'\u062c' # 0xCC -> ARABIC LETTER JEEM - u'\u062d' # 0xCD -> ARABIC LETTER HAH - u'\u062e' # 0xCE -> ARABIC LETTER KHAH - u'\u062f' # 0xCF -> ARABIC LETTER DAL - u'\u0630' # 0xD0 -> ARABIC LETTER THAL - u'\u0631' # 0xD1 -> ARABIC LETTER REH - u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xD3 -> ARABIC LETTER SEEN - u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xD5 -> ARABIC LETTER SAD - u'\u0636' # 0xD6 -> ARABIC LETTER DAD - u'\u0637' # 0xD7 -> ARABIC LETTER TAH - u'\u0638' # 0xD8 -> ARABIC LETTER ZAH - u'\u0639' # 0xD9 -> ARABIC LETTER AIN - u'\u063a' # 0xDA -> ARABIC LETTER GHAIN - u'[' # 0xDB -> LEFT SQUARE BRACKET, right-left - u'\\' # 0xDC -> REVERSE SOLIDUS, right-left - u']' # 0xDD -> RIGHT SQUARE BRACKET, right-left - u'^' # 0xDE -> CIRCUMFLEX ACCENT, right-left - u'_' # 0xDF -> LOW LINE, right-left - u'\u0640' # 0xE0 -> ARABIC TATWEEL - u'\u0641' # 0xE1 -> ARABIC LETTER FEH - u'\u0642' # 0xE2 -> ARABIC LETTER QAF - u'\u0643' # 0xE3 -> ARABIC LETTER KAF - u'\u0644' # 0xE4 -> ARABIC LETTER LAM - u'\u0645' # 0xE5 -> ARABIC LETTER MEEM - u'\u0646' # 0xE6 -> ARABIC LETTER NOON - u'\u0647' # 0xE7 -> ARABIC LETTER HEH - u'\u0648' # 0xE8 -> ARABIC LETTER WAW - u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xEA -> ARABIC LETTER YEH - u'\u064b' # 0xEB -> ARABIC FATHATAN - u'\u064c' # 0xEC -> ARABIC DAMMATAN - u'\u064d' # 0xED -> ARABIC KASRATAN - u'\u064e' # 0xEE -> ARABIC FATHA - u'\u064f' # 0xEF -> ARABIC DAMMA - u'\u0650' # 0xF0 -> ARABIC KASRA - u'\u0651' # 0xF1 -> ARABIC SHADDA - u'\u0652' # 0xF2 -> ARABIC SUKUN - u'\u067e' # 0xF3 -> ARABIC LETTER PEH - u'\u0679' # 0xF4 -> ARABIC LETTER TTEH - u'\u0686' # 0xF5 -> ARABIC LETTER TCHEH - u'\u06d5' # 0xF6 -> ARABIC LETTER AE - u'\u06a4' # 0xF7 -> ARABIC LETTER VEH - u'\u06af' # 0xF8 -> ARABIC LETTER GAF - u'\u0688' # 0xF9 -> ARABIC LETTER DDAL - u'\u0691' # 0xFA -> ARABIC LETTER RREH - u'{' # 0xFB -> LEFT CURLY BRACKET, right-left - u'|' # 0xFC -> VERTICAL LINE, right-left - u'}' # 0xFD -> RIGHT CURLY BRACKET, right-left - u'\u0698' # 0xFE -> ARABIC LETTER JEH - u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/mac_greek.py b/python/Lib/encodings/mac_greek.py deleted file mode 100755 index 68f4fff0db..0000000000 --- a/python/Lib/encodings/mac_greek.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_greek generated from 'MAPPINGS/VENDORS/APPLE/GREEK.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-greek', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xb9' # 0x81 -> SUPERSCRIPT ONE - u'\xb2' # 0x82 -> SUPERSCRIPT TWO - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xb3' # 0x84 -> SUPERSCRIPT THREE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0385' # 0x87 -> GREEK DIALYTIKA TONOS - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0384' # 0x8B -> GREEK TONOS - u'\xa8' # 0x8C -> DIAERESIS - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xa3' # 0x92 -> POUND SIGN - u'\u2122' # 0x93 -> TRADE MARK SIGN - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u2022' # 0x96 -> BULLET - u'\xbd' # 0x97 -> VULGAR FRACTION ONE HALF - u'\u2030' # 0x98 -> PER MILLE SIGN - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xa6' # 0x9B -> BROKEN BAR - u'\u20ac' # 0x9C -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\u0393' # 0xA1 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xA2 -> GREEK CAPITAL LETTER DELTA - u'\u0398' # 0xA3 -> GREEK CAPITAL LETTER THETA - u'\u039b' # 0xA4 -> GREEK CAPITAL LETTER LAMDA - u'\u039e' # 0xA5 -> GREEK CAPITAL LETTER XI - u'\u03a0' # 0xA6 -> GREEK CAPITAL LETTER PI - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u03a3' # 0xAA -> GREEK CAPITAL LETTER SIGMA - u'\u03aa' # 0xAB -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\xa7' # 0xAC -> SECTION SIGN - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xb0' # 0xAE -> DEGREE SIGN - u'\xb7' # 0xAF -> MIDDLE DOT - u'\u0391' # 0xB0 -> GREEK CAPITAL LETTER ALPHA - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\u0392' # 0xB5 -> GREEK CAPITAL LETTER BETA - u'\u0395' # 0xB6 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xB7 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xB8 -> GREEK CAPITAL LETTER ETA - u'\u0399' # 0xB9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xBA -> GREEK CAPITAL LETTER KAPPA - u'\u039c' # 0xBB -> GREEK CAPITAL LETTER MU - u'\u03a6' # 0xBC -> GREEK CAPITAL LETTER PHI - u'\u03ab' # 0xBD -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03a8' # 0xBE -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xBF -> GREEK CAPITAL LETTER OMEGA - u'\u03ac' # 0xC0 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u039d' # 0xC1 -> GREEK CAPITAL LETTER NU - u'\xac' # 0xC2 -> NOT SIGN - u'\u039f' # 0xC3 -> GREEK CAPITAL LETTER OMICRON - u'\u03a1' # 0xC4 -> GREEK CAPITAL LETTER RHO - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u03a4' # 0xC6 -> GREEK CAPITAL LETTER TAU - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\u03a5' # 0xCB -> GREEK CAPITAL LETTER UPSILON - u'\u03a7' # 0xCC -> GREEK CAPITAL LETTER CHI - u'\u0386' # 0xCD -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\u0388' # 0xCE -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2015' # 0xD1 -> HORIZONTAL BAR - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u0389' # 0xD7 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xD8 -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u038c' # 0xD9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\u038e' # 0xDA -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u03ad' # 0xDB -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xDC -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xDD -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0xDE -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u038f' # 0xDF -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u03cd' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA - u'\u03c8' # 0xE3 -> GREEK SMALL LETTER PSI - u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON - u'\u03c6' # 0xE6 -> GREEK SMALL LETTER PHI - u'\u03b3' # 0xE7 -> GREEK SMALL LETTER GAMMA - u'\u03b7' # 0xE8 -> GREEK SMALL LETTER ETA - u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA - u'\u03be' # 0xEA -> GREEK SMALL LETTER XI - u'\u03ba' # 0xEB -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xEC -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xED -> GREEK SMALL LETTER MU - u'\u03bd' # 0xEE -> GREEK SMALL LETTER NU - u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI - u'\u03ce' # 0xF1 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u03c1' # 0xF2 -> GREEK SMALL LETTER RHO - u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU - u'\u03b8' # 0xF5 -> GREEK SMALL LETTER THETA - u'\u03c9' # 0xF6 -> GREEK SMALL LETTER OMEGA - u'\u03c2' # 0xF7 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c7' # 0xF8 -> GREEK SMALL LETTER CHI - u'\u03c5' # 0xF9 -> GREEK SMALL LETTER UPSILON - u'\u03b6' # 0xFA -> GREEK SMALL LETTER ZETA - u'\u03ca' # 0xFB -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xFC -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u0390' # 0xFD -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03b0' # 0xFE -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\xad' # 0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/mac_iceland.py b/python/Lib/encodings/mac_iceland.py deleted file mode 100755 index c24add2ad0..0000000000 --- a/python/Lib/encodings/mac_iceland.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_iceland generated from 'MAPPINGS/VENDORS/APPLE/ICELAND.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-iceland', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xdd' # 0xA0 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xBE -> LATIN SMALL LETTER AE - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\xd0' # 0xDC -> LATIN CAPITAL LETTER ETH - u'\xf0' # 0xDD -> LATIN SMALL LETTER ETH - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN - u'\xfe' # 0xDF -> LATIN SMALL LETTER THORN - u'\xfd' # 0xE0 -> LATIN SMALL LETTER Y WITH ACUTE - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/mac_latin2.py b/python/Lib/encodings/mac_latin2.py deleted file mode 100755 index e322be236c..0000000000 --- a/python/Lib/encodings/mac_latin2.py +++ /dev/null @@ -1,183 +0,0 @@ -""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_map)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-latin2', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x0089: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x008c: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x008d: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x0090: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x0091: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x0094: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x0095: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x0096: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x009e: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a2: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00af: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00b0: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00b1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00b5: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b9: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00ba: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00bb: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x00bc: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x00bd: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x00be: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x00bf: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00c0: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00c1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00c5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00cc: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00cf: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00d9: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00da: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00db: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x00de: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00df: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00e0: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00e1: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00e5: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00e6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x00e9: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00ec: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00ed: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00f1: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x00f4: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00f5: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00f6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00f7: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00f8: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00f9: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fa: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00fb: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00fc: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00ff: 0x02c7, # CARON -}) - -### Encoding Map - -encoding_map = codecs.make_encoding_map(decoding_map) diff --git a/python/Lib/encodings/mac_roman.py b/python/Lib/encodings/mac_roman.py deleted file mode 100755 index 62605ec634..0000000000 --- a/python/Lib/encodings/mac_roman.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_roman generated from 'MAPPINGS/VENDORS/APPLE/ROMAN.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-roman', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xBE -> LATIN SMALL LETTER AE - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufb01' # 0xDE -> LATIN SMALL LIGATURE FI - u'\ufb02' # 0xDF -> LATIN SMALL LIGATURE FL - u'\u2021' # 0xE0 -> DOUBLE DAGGER - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/mac_romanian.py b/python/Lib/encodings/mac_romanian.py deleted file mode 100755 index 5bd5ae8625..0000000000 --- a/python/Lib/encodings/mac_romanian.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_romanian generated from 'MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-romanian', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u0102' # 0xAE -> LATIN CAPITAL LETTER A WITH BREVE - u'\u0218' # 0xAF -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\u0103' # 0xBE -> LATIN SMALL LETTER A WITH BREVE - u'\u0219' # 0xBF -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - u'\u021b' # 0xDF -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - u'\u2021' # 0xE0 -> DOUBLE DAGGER - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/mac_turkish.py b/python/Lib/encodings/mac_turkish.py deleted file mode 100755 index 0787f4990b..0000000000 --- a/python/Lib/encodings/mac_turkish.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_turkish generated from 'MAPPINGS/VENDORS/APPLE/TURKISH.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-turkish', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xBE -> LATIN SMALL LETTER AE - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u011e' # 0xDA -> LATIN CAPITAL LETTER G WITH BREVE - u'\u011f' # 0xDB -> LATIN SMALL LETTER G WITH BREVE - u'\u0130' # 0xDC -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u0131' # 0xDD -> LATIN SMALL LETTER DOTLESS I - u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u015f' # 0xDF -> LATIN SMALL LETTER S WITH CEDILLA - u'\u2021' # 0xE0 -> DOUBLE DAGGER - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\uf8a0' # 0xF5 -> undefined1 - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/mbcs.py b/python/Lib/encodings/mbcs.py deleted file mode 100755 index baf46cbd48..0000000000 --- a/python/Lib/encodings/mbcs.py +++ /dev/null @@ -1,47 +0,0 @@ -""" Python 'mbcs' Codec for Windows - - -Cloned by Mark Hammond (mhammond@skippinet.com.au) from ascii.py, -which was written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -# Import them explicitly to cause an ImportError -# on non-Windows systems -from codecs import mbcs_encode, mbcs_decode -# for IncrementalDecoder, IncrementalEncoder, ... -import codecs - -### Codec APIs - -encode = mbcs_encode - -def decode(input, errors='strict'): - return mbcs_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return mbcs_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = mbcs_decode - -class StreamWriter(codecs.StreamWriter): - encode = mbcs_encode - -class StreamReader(codecs.StreamReader): - decode = mbcs_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mbcs', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/palmos.py b/python/Lib/encodings/palmos.py deleted file mode 100755 index 4b77e2ba91..0000000000 --- a/python/Lib/encodings/palmos.py +++ /dev/null @@ -1,83 +0,0 @@ -""" Python Character Mapping Codec for PalmOS 3.5. - -Written by Sjoerd Mullender (sjoerd@acm.org); based on iso8859_15.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_map)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='palmos', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) - -# The PalmOS character set is mostly iso-8859-1 with some differences. -decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: 0x2666, # BLACK DIAMOND SUIT - 0x008e: 0x2663, # BLACK CLUB SUIT - 0x008f: 0x2665, # BLACK HEART SUIT - 0x0090: 0x2660, # BLACK SPADE SUIT - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS -}) - -### Encoding Map - -encoding_map = codecs.make_encoding_map(decoding_map) diff --git a/python/Lib/encodings/ptcp154.py b/python/Lib/encodings/ptcp154.py deleted file mode 100755 index aef897538f..0000000000 --- a/python/Lib/encodings/ptcp154.py +++ /dev/null @@ -1,175 +0,0 @@ -""" Python Character Mapping Codec generated from 'PTCP154.txt' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_map)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='ptcp154', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x0496, # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER - 0x0081: 0x0492, # CYRILLIC CAPITAL LETTER GHE WITH STROKE - 0x0082: 0x04ee, # CYRILLIC CAPITAL LETTER U WITH MACRON - 0x0083: 0x0493, # CYRILLIC SMALL LETTER GHE WITH STROKE - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x04b6, # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER - 0x0087: 0x04ae, # CYRILLIC CAPITAL LETTER STRAIGHT U - 0x0088: 0x04b2, # CYRILLIC CAPITAL LETTER HA WITH DESCENDER - 0x0089: 0x04af, # CYRILLIC SMALL LETTER STRAIGHT U - 0x008a: 0x04a0, # CYRILLIC CAPITAL LETTER BASHKIR KA - 0x008b: 0x04e2, # CYRILLIC CAPITAL LETTER I WITH MACRON - 0x008c: 0x04a2, # CYRILLIC CAPITAL LETTER EN WITH DESCENDER - 0x008d: 0x049a, # CYRILLIC CAPITAL LETTER KA WITH DESCENDER - 0x008e: 0x04ba, # CYRILLIC CAPITAL LETTER SHHA - 0x008f: 0x04b8, # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE - 0x0090: 0x0497, # CYRILLIC SMALL LETTER ZHE WITH DESCENDER - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x04b3, # CYRILLIC SMALL LETTER HA WITH DESCENDER - 0x0099: 0x04b7, # CYRILLIC SMALL LETTER CHE WITH DESCENDER - 0x009a: 0x04a1, # CYRILLIC SMALL LETTER BASHKIR KA - 0x009b: 0x04e3, # CYRILLIC SMALL LETTER I WITH MACRON - 0x009c: 0x04a3, # CYRILLIC SMALL LETTER EN WITH DESCENDER - 0x009d: 0x049b, # CYRILLIC SMALL LETTER KA WITH DESCENDER - 0x009e: 0x04bb, # CYRILLIC SMALL LETTER SHHA - 0x009f: 0x04b9, # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE - 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U (Byelorussian) - 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U (Byelorussian) - 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00a4: 0x04e8, # CYRILLIC CAPITAL LETTER BARRED O - 0x00a5: 0x0498, # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER - 0x00a6: 0x04b0, # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE - 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00aa: 0x04d8, # CYRILLIC CAPITAL LETTER SCHWA - 0x00ad: 0x04ef, # CYRILLIC SMALL LETTER U WITH MACRON - 0x00af: 0x049c, # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE - 0x00b1: 0x04b1, # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE - 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b4: 0x0499, # CYRILLIC SMALL LETTER ZE WITH DESCENDER - 0x00b5: 0x04e9, # CYRILLIC SMALL LETTER BARRED O - 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00b9: 0x2116, # NUMERO SIGN - 0x00ba: 0x04d9, # CYRILLIC SMALL LETTER SCHWA - 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00bd: 0x04aa, # CYRILLIC CAPITAL LETTER ES WITH DESCENDER - 0x00be: 0x04ab, # CYRILLIC SMALL LETTER ES WITH DESCENDER - 0x00bf: 0x049d, # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE - 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O - 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E - 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA -}) - -### Encoding Map - -encoding_map = codecs.make_encoding_map(decoding_map) diff --git a/python/Lib/encodings/punycode.py b/python/Lib/encodings/punycode.py deleted file mode 100755 index d97200fd35..0000000000 --- a/python/Lib/encodings/punycode.py +++ /dev/null @@ -1,238 +0,0 @@ -# -*- coding: iso-8859-1 -*- -""" Codec for the Punicode encoding, as specified in RFC 3492 - -Written by Martin v. Löwis. -""" - -import codecs - -##################### Encoding ##################################### - -def segregate(str): - """3.1 Basic code point segregation""" - base = [] - extended = {} - for c in str: - if ord(c) < 128: - base.append(c) - else: - extended[c] = 1 - extended = extended.keys() - extended.sort() - return "".join(base).encode("ascii"),extended - -def selective_len(str, max): - """Return the length of str, considering only characters below max.""" - res = 0 - for c in str: - if ord(c) < max: - res += 1 - return res - -def selective_find(str, char, index, pos): - """Return a pair (index, pos), indicating the next occurrence of - char in str. index is the position of the character considering - only ordinals up to and including char, and pos is the position in - the full string. index/pos is the starting position in the full - string.""" - - l = len(str) - while 1: - pos += 1 - if pos == l: - return (-1, -1) - c = str[pos] - if c == char: - return index+1, pos - elif c < char: - index += 1 - -def insertion_unsort(str, extended): - """3.2 Insertion unsort coding""" - oldchar = 0x80 - result = [] - oldindex = -1 - for c in extended: - index = pos = -1 - char = ord(c) - curlen = selective_len(str, char) - delta = (curlen+1) * (char - oldchar) - while 1: - index,pos = selective_find(str,c,index,pos) - if index == -1: - break - delta += index - oldindex - result.append(delta-1) - oldindex = index - delta = 0 - oldchar = char - - return result - -def T(j, bias): - # Punycode parameters: tmin = 1, tmax = 26, base = 36 - res = 36 * (j + 1) - bias - if res < 1: return 1 - if res > 26: return 26 - return res - -digits = "abcdefghijklmnopqrstuvwxyz0123456789" -def generate_generalized_integer(N, bias): - """3.3 Generalized variable-length integers""" - result = [] - j = 0 - while 1: - t = T(j, bias) - if N < t: - result.append(digits[N]) - return result - result.append(digits[t + ((N - t) % (36 - t))]) - N = (N - t) // (36 - t) - j += 1 - -def adapt(delta, first, numchars): - if first: - delta //= 700 - else: - delta //= 2 - delta += delta // numchars - # ((base - tmin) * tmax) // 2 == 455 - divisions = 0 - while delta > 455: - delta = delta // 35 # base - tmin - divisions += 36 - bias = divisions + (36 * delta // (delta + 38)) - return bias - - -def generate_integers(baselen, deltas): - """3.4 Bias adaptation""" - # Punycode parameters: initial bias = 72, damp = 700, skew = 38 - result = [] - bias = 72 - for points, delta in enumerate(deltas): - s = generate_generalized_integer(delta, bias) - result.extend(s) - bias = adapt(delta, points==0, baselen+points+1) - return "".join(result) - -def punycode_encode(text): - base, extended = segregate(text) - base = base.encode("ascii") - deltas = insertion_unsort(text, extended) - extended = generate_integers(len(base), deltas) - if base: - return base + "-" + extended - return extended - -##################### Decoding ##################################### - -def decode_generalized_number(extended, extpos, bias, errors): - """3.3 Generalized variable-length integers""" - result = 0 - w = 1 - j = 0 - while 1: - try: - char = ord(extended[extpos]) - except IndexError: - if errors == "strict": - raise UnicodeError, "incomplete punicode string" - return extpos + 1, None - extpos += 1 - if 0x41 <= char <= 0x5A: # A-Z - digit = char - 0x41 - elif 0x30 <= char <= 0x39: - digit = char - 22 # 0x30-26 - elif errors == "strict": - raise UnicodeError("Invalid extended code point '%s'" - % extended[extpos]) - else: - return extpos, None - t = T(j, bias) - result += digit * w - if digit < t: - return extpos, result - w = w * (36 - t) - j += 1 - - -def insertion_sort(base, extended, errors): - """3.2 Insertion unsort coding""" - char = 0x80 - pos = -1 - bias = 72 - extpos = 0 - while extpos < len(extended): - newpos, delta = decode_generalized_number(extended, extpos, - bias, errors) - if delta is None: - # There was an error in decoding. We can't continue because - # synchronization is lost. - return base - pos += delta+1 - char += pos // (len(base) + 1) - if char > 0x10FFFF: - if errors == "strict": - raise UnicodeError, ("Invalid character U+%x" % char) - char = ord('?') - pos = pos % (len(base) + 1) - base = base[:pos] + unichr(char) + base[pos:] - bias = adapt(delta, (extpos == 0), len(base)) - extpos = newpos - return base - -def punycode_decode(text, errors): - pos = text.rfind("-") - if pos == -1: - base = "" - extended = text - else: - base = text[:pos] - extended = text[pos+1:] - base = unicode(base, "ascii", errors) - extended = extended.upper() - return insertion_sort(base, extended, errors) - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - res = punycode_encode(input) - return res, len(input) - - def decode(self,input,errors='strict'): - if errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError, "Unsupported error handling "+errors - res = punycode_decode(input, errors) - return res, len(input) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return punycode_encode(input) - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - if self.errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError, "Unsupported error handling "+self.errors - return punycode_decode(input, self.errors) - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='punycode', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/python/Lib/encodings/quopri_codec.py b/python/Lib/encodings/quopri_codec.py deleted file mode 100755 index e4965da86c..0000000000 --- a/python/Lib/encodings/quopri_codec.py +++ /dev/null @@ -1,76 +0,0 @@ -"""Codec for quoted-printable encoding. - -Like base64 and rot13, this returns Python strings, not Unicode. -""" - -import codecs, quopri -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -def quopri_encode(input, errors='strict'): - """Encode the input, returning a tuple (output object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - # using str() because of cStringIO's Unicode undesired Unicode behavior. - f = StringIO(str(input)) - g = StringIO() - quopri.encode(f, g, quotetabs=True) - output = g.getvalue() - return (output, len(input)) - -def quopri_decode(input, errors='strict'): - """Decode the input, returning a tuple (output object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - f = StringIO(str(input)) - g = StringIO() - quopri.decode(f, g) - output = g.getvalue() - return (output, len(input)) - -class Codec(codecs.Codec): - - def encode(self, input,errors='strict'): - return quopri_encode(input,errors) - def decode(self, input,errors='strict'): - return quopri_decode(input,errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return quopri_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return quopri_decode(input, self.errors)[0] - -class StreamWriter(Codec, codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -# encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='quopri', - encode=quopri_encode, - decode=quopri_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - _is_text_encoding=False, - ) diff --git a/python/Lib/encodings/raw_unicode_escape.py b/python/Lib/encodings/raw_unicode_escape.py deleted file mode 100755 index 2b919b40d3..0000000000 --- a/python/Lib/encodings/raw_unicode_escape.py +++ /dev/null @@ -1,45 +0,0 @@ -""" Python 'raw-unicode-escape' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.raw_unicode_escape_encode - decode = codecs.raw_unicode_escape_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.raw_unicode_escape_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.raw_unicode_escape_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='raw-unicode-escape', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/python/Lib/encodings/rot_13.py b/python/Lib/encodings/rot_13.py deleted file mode 100755 index 4eaf4338f9..0000000000 --- a/python/Lib/encodings/rot_13.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python -""" Python Character Mapping Codec for ROT13. - - See http://ucsub.colorado.edu/~kominek/rot13/ for details. - - Written by Marc-Andre Lemburg (mal@lemburg.com). - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_map)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='rot-13', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - _is_text_encoding=False, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0041: 0x004e, - 0x0042: 0x004f, - 0x0043: 0x0050, - 0x0044: 0x0051, - 0x0045: 0x0052, - 0x0046: 0x0053, - 0x0047: 0x0054, - 0x0048: 0x0055, - 0x0049: 0x0056, - 0x004a: 0x0057, - 0x004b: 0x0058, - 0x004c: 0x0059, - 0x004d: 0x005a, - 0x004e: 0x0041, - 0x004f: 0x0042, - 0x0050: 0x0043, - 0x0051: 0x0044, - 0x0052: 0x0045, - 0x0053: 0x0046, - 0x0054: 0x0047, - 0x0055: 0x0048, - 0x0056: 0x0049, - 0x0057: 0x004a, - 0x0058: 0x004b, - 0x0059: 0x004c, - 0x005a: 0x004d, - 0x0061: 0x006e, - 0x0062: 0x006f, - 0x0063: 0x0070, - 0x0064: 0x0071, - 0x0065: 0x0072, - 0x0066: 0x0073, - 0x0067: 0x0074, - 0x0068: 0x0075, - 0x0069: 0x0076, - 0x006a: 0x0077, - 0x006b: 0x0078, - 0x006c: 0x0079, - 0x006d: 0x007a, - 0x006e: 0x0061, - 0x006f: 0x0062, - 0x0070: 0x0063, - 0x0071: 0x0064, - 0x0072: 0x0065, - 0x0073: 0x0066, - 0x0074: 0x0067, - 0x0075: 0x0068, - 0x0076: 0x0069, - 0x0077: 0x006a, - 0x0078: 0x006b, - 0x0079: 0x006c, - 0x007a: 0x006d, -}) - -### Encoding Map - -encoding_map = codecs.make_encoding_map(decoding_map) - -### Filter API - -def rot13(infile, outfile): - outfile.write(infile.read().encode('rot-13')) - -if __name__ == '__main__': - import sys - rot13(sys.stdin, sys.stdout) diff --git a/python/Lib/encodings/shift_jis.py b/python/Lib/encodings/shift_jis.py deleted file mode 100755 index 8338117276..0000000000 --- a/python/Lib/encodings/shift_jis.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# shift_jis.py: Python Unicode Codec for SHIFT_JIS -# -# Written by Hye-Shik Chang -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('shift_jis') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='shift_jis', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/shift_jis_2004.py b/python/Lib/encodings/shift_jis_2004.py deleted file mode 100755 index 161b1e86f9..0000000000 --- a/python/Lib/encodings/shift_jis_2004.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004 -# -# Written by Hye-Shik Chang -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('shift_jis_2004') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='shift_jis_2004', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/shift_jisx0213.py b/python/Lib/encodings/shift_jisx0213.py deleted file mode 100755 index cb653f5305..0000000000 --- a/python/Lib/encodings/shift_jisx0213.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213 -# -# Written by Hye-Shik Chang -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('shift_jisx0213') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='shift_jisx0213', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/string_escape.py b/python/Lib/encodings/string_escape.py deleted file mode 100755 index e329a2607d..0000000000 --- a/python/Lib/encodings/string_escape.py +++ /dev/null @@ -1,38 +0,0 @@ -# -*- coding: iso-8859-1 -*- -""" Python 'escape' Codec - - -Written by Martin v. Löwis (martin@v.loewis.de). - -""" -import codecs - -class Codec(codecs.Codec): - - encode = codecs.escape_encode - decode = codecs.escape_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.escape_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.escape_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -def getregentry(): - return codecs.CodecInfo( - name='string-escape', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/python/Lib/encodings/tis_620.py b/python/Lib/encodings/tis_620.py deleted file mode 100755 index b2cd22b23d..0000000000 --- a/python/Lib/encodings/tis_620.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec tis_620 generated from 'python-mappings/TIS-620.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='tis-620', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> - u'\x81' # 0x81 -> - u'\x82' # 0x82 -> - u'\x83' # 0x83 -> - u'\x84' # 0x84 -> - u'\x85' # 0x85 -> - u'\x86' # 0x86 -> - u'\x87' # 0x87 -> - u'\x88' # 0x88 -> - u'\x89' # 0x89 -> - u'\x8a' # 0x8A -> - u'\x8b' # 0x8B -> - u'\x8c' # 0x8C -> - u'\x8d' # 0x8D -> - u'\x8e' # 0x8E -> - u'\x8f' # 0x8F -> - u'\x90' # 0x90 -> - u'\x91' # 0x91 -> - u'\x92' # 0x92 -> - u'\x93' # 0x93 -> - u'\x94' # 0x94 -> - u'\x95' # 0x95 -> - u'\x96' # 0x96 -> - u'\x97' # 0x97 -> - u'\x98' # 0x98 -> - u'\x99' # 0x99 -> - u'\x9a' # 0x9A -> - u'\x9b' # 0x9B -> - u'\x9c' # 0x9C -> - u'\x9d' # 0x9D -> - u'\x9e' # 0x9E -> - u'\x9f' # 0x9F -> - u'\ufffe' - u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO - u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING - u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA - u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN - u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xC4 -> THAI CHARACTER RU - u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xC6 -> THAI CHARACTER LU - u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG - u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xF0 -> THAI DIGIT ZERO - u'\u0e51' # 0xF1 -> THAI DIGIT ONE - u'\u0e52' # 0xF2 -> THAI DIGIT TWO - u'\u0e53' # 0xF3 -> THAI DIGIT THREE - u'\u0e54' # 0xF4 -> THAI DIGIT FOUR - u'\u0e55' # 0xF5 -> THAI DIGIT FIVE - u'\u0e56' # 0xF6 -> THAI DIGIT SIX - u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xF9 -> THAI DIGIT NINE - u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/python/Lib/encodings/undefined.py b/python/Lib/encodings/undefined.py deleted file mode 100755 index 4690288355..0000000000 --- a/python/Lib/encodings/undefined.py +++ /dev/null @@ -1,49 +0,0 @@ -""" Python 'undefined' Codec - - This codec will always raise a ValueError exception when being - used. It is intended for use by the site.py file to switch off - automatic string to Unicode coercion. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - raise UnicodeError("undefined encoding") - - def decode(self,input,errors='strict'): - raise UnicodeError("undefined encoding") - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - raise UnicodeError("undefined encoding") - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - raise UnicodeError("undefined encoding") - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='undefined', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/python/Lib/encodings/unicode_escape.py b/python/Lib/encodings/unicode_escape.py deleted file mode 100755 index 817f93265a..0000000000 --- a/python/Lib/encodings/unicode_escape.py +++ /dev/null @@ -1,45 +0,0 @@ -""" Python 'unicode-escape' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.unicode_escape_encode - decode = codecs.unicode_escape_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.unicode_escape_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.unicode_escape_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='unicode-escape', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/python/Lib/encodings/unicode_internal.py b/python/Lib/encodings/unicode_internal.py deleted file mode 100755 index df3e7752d2..0000000000 --- a/python/Lib/encodings/unicode_internal.py +++ /dev/null @@ -1,45 +0,0 @@ -""" Python 'unicode-internal' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.unicode_internal_encode - decode = codecs.unicode_internal_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.unicode_internal_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.unicode_internal_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='unicode-internal', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/python/Lib/encodings/utf_16.py b/python/Lib/encodings/utf_16.py deleted file mode 100755 index f3fadff615..0000000000 --- a/python/Lib/encodings/utf_16.py +++ /dev/null @@ -1,126 +0,0 @@ -""" Python 'utf-16' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs, sys - -### Codec APIs - -encode = codecs.utf_16_encode - -def decode(input, errors='strict'): - return codecs.utf_16_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict'): - codecs.IncrementalEncoder.__init__(self, errors) - self.encoder = None - - def encode(self, input, final=False): - if self.encoder is None: - result = codecs.utf_16_encode(input, self.errors)[0] - if sys.byteorder == 'little': - self.encoder = codecs.utf_16_le_encode - else: - self.encoder = codecs.utf_16_be_encode - return result - return self.encoder(input, self.errors)[0] - - def reset(self): - codecs.IncrementalEncoder.reset(self) - self.encoder = None - - def getstate(self): - # state info we return to the caller: - # 0: stream is in natural order for this platform - # 2: endianness hasn't been determined yet - # (we're never writing in unnatural order) - return (2 if self.encoder is None else 0) - - def setstate(self, state): - if state: - self.encoder = None - else: - if sys.byteorder == 'little': - self.encoder = codecs.utf_16_le_encode - else: - self.encoder = codecs.utf_16_be_encode - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def __init__(self, errors='strict'): - codecs.BufferedIncrementalDecoder.__init__(self, errors) - self.decoder = None - - def _buffer_decode(self, input, errors, final): - if self.decoder is None: - (output, consumed, byteorder) = \ - codecs.utf_16_ex_decode(input, errors, 0, final) - if byteorder == -1: - self.decoder = codecs.utf_16_le_decode - elif byteorder == 1: - self.decoder = codecs.utf_16_be_decode - elif consumed >= 2: - raise UnicodeError("UTF-16 stream does not start with BOM") - return (output, consumed) - return self.decoder(input, self.errors, final) - - def reset(self): - codecs.BufferedIncrementalDecoder.reset(self) - self.decoder = None - -class StreamWriter(codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - self.encoder = None - - def reset(self): - codecs.StreamWriter.reset(self) - self.encoder = None - - def encode(self, input, errors='strict'): - if self.encoder is None: - result = codecs.utf_16_encode(input, errors) - if sys.byteorder == 'little': - self.encoder = codecs.utf_16_le_encode - else: - self.encoder = codecs.utf_16_be_encode - return result - else: - return self.encoder(input, errors) - -class StreamReader(codecs.StreamReader): - - def reset(self): - codecs.StreamReader.reset(self) - try: - del self.decode - except AttributeError: - pass - - def decode(self, input, errors='strict'): - (object, consumed, byteorder) = \ - codecs.utf_16_ex_decode(input, errors, 0, False) - if byteorder == -1: - self.decode = codecs.utf_16_le_decode - elif byteorder == 1: - self.decode = codecs.utf_16_be_decode - elif consumed>=2: - raise UnicodeError,"UTF-16 stream does not start with BOM" - return (object, consumed) - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-16', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/utf_16_be.py b/python/Lib/encodings/utf_16_be.py deleted file mode 100755 index 86b458eb9b..0000000000 --- a/python/Lib/encodings/utf_16_be.py +++ /dev/null @@ -1,42 +0,0 @@ -""" Python 'utf-16-be' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -encode = codecs.utf_16_be_encode - -def decode(input, errors='strict'): - return codecs.utf_16_be_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_16_be_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_16_be_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_16_be_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_16_be_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-16-be', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/utf_16_le.py b/python/Lib/encodings/utf_16_le.py deleted file mode 100755 index ec454142ee..0000000000 --- a/python/Lib/encodings/utf_16_le.py +++ /dev/null @@ -1,42 +0,0 @@ -""" Python 'utf-16-le' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -encode = codecs.utf_16_le_encode - -def decode(input, errors='strict'): - return codecs.utf_16_le_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_16_le_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_16_le_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_16_le_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_16_le_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-16-le', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/utf_32.py b/python/Lib/encodings/utf_32.py deleted file mode 100755 index 067e1e201e..0000000000 --- a/python/Lib/encodings/utf_32.py +++ /dev/null @@ -1,150 +0,0 @@ -""" -Python 'utf-32' Codec -""" -import codecs, sys - -### Codec APIs - -encode = codecs.utf_32_encode - -def decode(input, errors='strict'): - return codecs.utf_32_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict'): - codecs.IncrementalEncoder.__init__(self, errors) - self.encoder = None - - def encode(self, input, final=False): - if self.encoder is None: - result = codecs.utf_32_encode(input, self.errors)[0] - if sys.byteorder == 'little': - self.encoder = codecs.utf_32_le_encode - else: - self.encoder = codecs.utf_32_be_encode - return result - return self.encoder(input, self.errors)[0] - - def reset(self): - codecs.IncrementalEncoder.reset(self) - self.encoder = None - - def getstate(self): - # state info we return to the caller: - # 0: stream is in natural order for this platform - # 2: endianness hasn't been determined yet - # (we're never writing in unnatural order) - return (2 if self.encoder is None else 0) - - def setstate(self, state): - if state: - self.encoder = None - else: - if sys.byteorder == 'little': - self.encoder = codecs.utf_32_le_encode - else: - self.encoder = codecs.utf_32_be_encode - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def __init__(self, errors='strict'): - codecs.BufferedIncrementalDecoder.__init__(self, errors) - self.decoder = None - - def _buffer_decode(self, input, errors, final): - if self.decoder is None: - (output, consumed, byteorder) = \ - codecs.utf_32_ex_decode(input, errors, 0, final) - if byteorder == -1: - self.decoder = codecs.utf_32_le_decode - elif byteorder == 1: - self.decoder = codecs.utf_32_be_decode - elif consumed >= 4: - raise UnicodeError("UTF-32 stream does not start with BOM") - return (output, consumed) - return self.decoder(input, self.errors, final) - - def reset(self): - codecs.BufferedIncrementalDecoder.reset(self) - self.decoder = None - - def getstate(self): - # additional state info from the base class must be None here, - # as it isn't passed along to the caller - state = codecs.BufferedIncrementalDecoder.getstate(self)[0] - # additional state info we pass to the caller: - # 0: stream is in natural order for this platform - # 1: stream is in unnatural order - # 2: endianness hasn't been determined yet - if self.decoder is None: - return (state, 2) - addstate = int((sys.byteorder == "big") != - (self.decoder is codecs.utf_32_be_decode)) - return (state, addstate) - - def setstate(self, state): - # state[1] will be ignored by BufferedIncrementalDecoder.setstate() - codecs.BufferedIncrementalDecoder.setstate(self, state) - state = state[1] - if state == 0: - self.decoder = (codecs.utf_32_be_decode - if sys.byteorder == "big" - else codecs.utf_32_le_decode) - elif state == 1: - self.decoder = (codecs.utf_32_le_decode - if sys.byteorder == "big" - else codecs.utf_32_be_decode) - else: - self.decoder = None - -class StreamWriter(codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - self.encoder = None - codecs.StreamWriter.__init__(self, stream, errors) - - def reset(self): - codecs.StreamWriter.reset(self) - self.encoder = None - - def encode(self, input, errors='strict'): - if self.encoder is None: - result = codecs.utf_32_encode(input, errors) - if sys.byteorder == 'little': - self.encoder = codecs.utf_32_le_encode - else: - self.encoder = codecs.utf_32_be_encode - return result - else: - return self.encoder(input, errors) - -class StreamReader(codecs.StreamReader): - - def reset(self): - codecs.StreamReader.reset(self) - try: - del self.decode - except AttributeError: - pass - - def decode(self, input, errors='strict'): - (object, consumed, byteorder) = \ - codecs.utf_32_ex_decode(input, errors, 0, False) - if byteorder == -1: - self.decode = codecs.utf_32_le_decode - elif byteorder == 1: - self.decode = codecs.utf_32_be_decode - elif consumed>=4: - raise UnicodeError,"UTF-32 stream does not start with BOM" - return (object, consumed) - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-32', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/utf_32_be.py b/python/Lib/encodings/utf_32_be.py deleted file mode 100755 index fe272b5faf..0000000000 --- a/python/Lib/encodings/utf_32_be.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Python 'utf-32-be' Codec -""" -import codecs - -### Codec APIs - -encode = codecs.utf_32_be_encode - -def decode(input, errors='strict'): - return codecs.utf_32_be_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_32_be_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_32_be_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_32_be_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_32_be_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-32-be', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/utf_32_le.py b/python/Lib/encodings/utf_32_le.py deleted file mode 100755 index 9e48210928..0000000000 --- a/python/Lib/encodings/utf_32_le.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Python 'utf-32-le' Codec -""" -import codecs - -### Codec APIs - -encode = codecs.utf_32_le_encode - -def decode(input, errors='strict'): - return codecs.utf_32_le_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_32_le_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_32_le_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_32_le_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_32_le_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-32-le', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/utf_7.py b/python/Lib/encodings/utf_7.py deleted file mode 100755 index 8e0567f208..0000000000 --- a/python/Lib/encodings/utf_7.py +++ /dev/null @@ -1,38 +0,0 @@ -""" Python 'utf-7' Codec - -Written by Brian Quinlan (brian@sweetapp.com). -""" -import codecs - -### Codec APIs - -encode = codecs.utf_7_encode - -def decode(input, errors='strict'): - return codecs.utf_7_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_7_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_7_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_7_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_7_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-7', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/utf_8.py b/python/Lib/encodings/utf_8.py deleted file mode 100755 index 1bf6336571..0000000000 --- a/python/Lib/encodings/utf_8.py +++ /dev/null @@ -1,42 +0,0 @@ -""" Python 'utf-8' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -encode = codecs.utf_8_encode - -def decode(input, errors='strict'): - return codecs.utf_8_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_8_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_8_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_8_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_8_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-8', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/utf_8_sig.py b/python/Lib/encodings/utf_8_sig.py deleted file mode 100755 index 8784694f02..0000000000 --- a/python/Lib/encodings/utf_8_sig.py +++ /dev/null @@ -1,117 +0,0 @@ -""" Python 'utf-8-sig' Codec -This work similar to UTF-8 with the following changes: - -* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the - first three bytes. - -* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these - bytes will be skipped. -""" -import codecs - -### Codec APIs - -def encode(input, errors='strict'): - return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) - -def decode(input, errors='strict'): - prefix = 0 - if input[:3] == codecs.BOM_UTF8: - input = input[3:] - prefix = 3 - (output, consumed) = codecs.utf_8_decode(input, errors, True) - return (output, consumed+prefix) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict'): - codecs.IncrementalEncoder.__init__(self, errors) - self.first = 1 - - def encode(self, input, final=False): - if self.first: - self.first = 0 - return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0] - else: - return codecs.utf_8_encode(input, self.errors)[0] - - def reset(self): - codecs.IncrementalEncoder.reset(self) - self.first = 1 - - def getstate(self): - return self.first - - def setstate(self, state): - self.first = state - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def __init__(self, errors='strict'): - codecs.BufferedIncrementalDecoder.__init__(self, errors) - self.first = True - - def _buffer_decode(self, input, errors, final): - if self.first: - if len(input) < 3: - if codecs.BOM_UTF8.startswith(input): - # not enough data to decide if this really is a BOM - # => try again on the next call - return (u"", 0) - else: - self.first = None - else: - self.first = None - if input[:3] == codecs.BOM_UTF8: - (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) - return (output, consumed+3) - return codecs.utf_8_decode(input, errors, final) - - def reset(self): - codecs.BufferedIncrementalDecoder.reset(self) - self.first = True - -class StreamWriter(codecs.StreamWriter): - def reset(self): - codecs.StreamWriter.reset(self) - try: - del self.encode - except AttributeError: - pass - - def encode(self, input, errors='strict'): - self.encode = codecs.utf_8_encode - return encode(input, errors) - -class StreamReader(codecs.StreamReader): - def reset(self): - codecs.StreamReader.reset(self) - try: - del self.decode - except AttributeError: - pass - - def decode(self, input, errors='strict'): - if len(input) < 3: - if codecs.BOM_UTF8.startswith(input): - # not enough data to decide if this is a BOM - # => try again on the next call - return (u"", 0) - elif input[:3] == codecs.BOM_UTF8: - self.decode = codecs.utf_8_decode - (output, consumed) = codecs.utf_8_decode(input[3:],errors) - return (output, consumed+3) - # (else) no BOM present - self.decode = codecs.utf_8_decode - return codecs.utf_8_decode(input, errors) - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-8-sig', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/python/Lib/encodings/uu_codec.py b/python/Lib/encodings/uu_codec.py deleted file mode 100755 index 5cb0d2b13e..0000000000 --- a/python/Lib/encodings/uu_codec.py +++ /dev/null @@ -1,130 +0,0 @@ -""" Python 'uu_codec' Codec - UU content transfer encoding - - Unlike most of the other codecs which target Unicode, this codec - will return Python string objects for both encode and decode. - - Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were - adapted from uu.py which was written by Lance Ellinghouse and - modified by Jack Jansen and Fredrik Lundh. - -""" -import codecs, binascii - -### Codec APIs - -def uu_encode(input,errors='strict',filename='',mode=0666): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - from cStringIO import StringIO - from binascii import b2a_uu - # using str() because of cStringIO's Unicode undesired Unicode behavior. - infile = StringIO(str(input)) - outfile = StringIO() - read = infile.read - write = outfile.write - - # Encode - write('begin %o %s\n' % (mode & 0777, filename)) - chunk = read(45) - while chunk: - write(b2a_uu(chunk)) - chunk = read(45) - write(' \nend\n') - - return (outfile.getvalue(), len(input)) - -def uu_decode(input,errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - Note: filename and file mode information in the input data is - ignored. - - """ - assert errors == 'strict' - from cStringIO import StringIO - from binascii import a2b_uu - infile = StringIO(str(input)) - outfile = StringIO() - readline = infile.readline - write = outfile.write - - # Find start of encoded data - while 1: - s = readline() - if not s: - raise ValueError, 'Missing "begin" line in input data' - if s[:5] == 'begin': - break - - # Decode - while 1: - s = readline() - if not s or \ - s == 'end\n': - break - try: - data = a2b_uu(s) - except binascii.Error, v: - # Workaround for broken uuencoders by /Fredrik Lundh - nbytes = (((ord(s[0])-32) & 63) * 4 + 5) // 3 - data = a2b_uu(s[:nbytes]) - #sys.stderr.write("Warning: %s\n" % str(v)) - write(data) - if not s: - raise ValueError, 'Truncated input data' - - return (outfile.getvalue(), len(input)) - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return uu_encode(input,errors) - - def decode(self,input,errors='strict'): - return uu_decode(input,errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return uu_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return uu_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='uu', - encode=uu_encode, - decode=uu_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - _is_text_encoding=False, - ) diff --git a/python/Lib/encodings/zlib_codec.py b/python/Lib/encodings/zlib_codec.py deleted file mode 100755 index 0c2599d401..0000000000 --- a/python/Lib/encodings/zlib_codec.py +++ /dev/null @@ -1,103 +0,0 @@ -""" Python 'zlib_codec' Codec - zlib compression encoding - - Unlike most of the other codecs which target Unicode, this codec - will return Python string objects for both encode and decode. - - Written by Marc-Andre Lemburg (mal@lemburg.com). - -""" -import codecs -import zlib # this codec needs the optional zlib module ! - -### Codec APIs - -def zlib_encode(input,errors='strict'): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = zlib.compress(input) - return (output, len(input)) - -def zlib_decode(input,errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = zlib.decompress(input) - return (output, len(input)) - -class Codec(codecs.Codec): - - def encode(self, input, errors='strict'): - return zlib_encode(input, errors) - def decode(self, input, errors='strict'): - return zlib_decode(input, errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict'): - assert errors == 'strict' - self.errors = errors - self.compressobj = zlib.compressobj() - - def encode(self, input, final=False): - if final: - c = self.compressobj.compress(input) - return c + self.compressobj.flush() - else: - return self.compressobj.compress(input) - - def reset(self): - self.compressobj = zlib.compressobj() - -class IncrementalDecoder(codecs.IncrementalDecoder): - def __init__(self, errors='strict'): - assert errors == 'strict' - self.errors = errors - self.decompressobj = zlib.decompressobj() - - def decode(self, input, final=False): - if final: - c = self.decompressobj.decompress(input) - return c + self.decompressobj.flush() - else: - return self.decompressobj.decompress(input) - - def reset(self): - self.decompressobj = zlib.decompressobj() - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='zlib', - encode=zlib_encode, - decode=zlib_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - _is_text_encoding=False, - ) diff --git a/python/Lib/ensurepip/__init__.py b/python/Lib/ensurepip/__init__.py deleted file mode 100644 index c2abed84ef..0000000000 --- a/python/Lib/ensurepip/__init__.py +++ /dev/null @@ -1,206 +0,0 @@ -#!/usr/bin/env python2 -from __future__ import print_function - -import os -import os.path -import pkgutil -import shutil -import sys -import tempfile - - -__all__ = ["version", "bootstrap"] - - -_SETUPTOOLS_VERSION = "28.8.0" - -_PIP_VERSION = "9.0.1" - -_PROJECTS = [ - ("setuptools", _SETUPTOOLS_VERSION), - ("pip", _PIP_VERSION), -] - - -def _run_pip(args, additional_paths=None): - # Add our bundled software to the sys.path so we can import it - if additional_paths is not None: - sys.path = additional_paths + sys.path - - # Install the bundled software - import pip - pip.main(args) - - -def version(): - """ - Returns a string specifying the bundled version of pip. - """ - return _PIP_VERSION - - -def _disable_pip_configuration_settings(): - # We deliberately ignore all pip environment variables - # when invoking pip - # See http://bugs.python.org/issue19734 for details - keys_to_remove = [k for k in os.environ if k.startswith("PIP_")] - for k in keys_to_remove: - del os.environ[k] - # We also ignore the settings in the default pip configuration file - # See http://bugs.python.org/issue20053 for details - os.environ['PIP_CONFIG_FILE'] = os.devnull - - -def bootstrap(root=None, upgrade=False, user=False, - altinstall=False, default_pip=True, - verbosity=0): - """ - Bootstrap pip into the current Python installation (or the given root - directory). - - Note that calling this function will alter both sys.path and os.environ. - """ - if altinstall and default_pip: - raise ValueError("Cannot use altinstall and default_pip together") - - _disable_pip_configuration_settings() - - # By default, installing pip and setuptools installs all of the - # following scripts (X.Y == running Python version): - # - # pip, pipX, pipX.Y, easy_install, easy_install-X.Y - # - # pip 1.5+ allows ensurepip to request that some of those be left out - if altinstall: - # omit pip, pipX and easy_install - os.environ["ENSUREPIP_OPTIONS"] = "altinstall" - elif not default_pip: - # omit pip and easy_install - os.environ["ENSUREPIP_OPTIONS"] = "install" - - tmpdir = tempfile.mkdtemp() - try: - # Put our bundled wheels into a temporary directory and construct the - # additional paths that need added to sys.path - additional_paths = [] - for project, version in _PROJECTS: - wheel_name = "{}-{}-py2.py3-none-any.whl".format(project, version) - whl = pkgutil.get_data( - "ensurepip", - "_bundled/{}".format(wheel_name), - ) - with open(os.path.join(tmpdir, wheel_name), "wb") as fp: - fp.write(whl) - - additional_paths.append(os.path.join(tmpdir, wheel_name)) - - # Construct the arguments to be passed to the pip command - args = ["install", "--no-index", "--find-links", tmpdir] - if root: - args += ["--root", root] - if upgrade: - args += ["--upgrade"] - if user: - args += ["--user"] - if verbosity: - args += ["-" + "v" * verbosity] - - _run_pip(args + [p[0] for p in _PROJECTS], additional_paths) - finally: - shutil.rmtree(tmpdir, ignore_errors=True) - - -def _uninstall_helper(verbosity=0): - """Helper to support a clean default uninstall process on Windows - - Note that calling this function may alter os.environ. - """ - # Nothing to do if pip was never installed, or has been removed - try: - import pip - except ImportError: - return - - # If the pip version doesn't match the bundled one, leave it alone - if pip.__version__ != _PIP_VERSION: - msg = ("ensurepip will only uninstall a matching version " - "({!r} installed, {!r} bundled)") - print(msg.format(pip.__version__, _PIP_VERSION), file=sys.stderr) - return - - _disable_pip_configuration_settings() - - # Construct the arguments to be passed to the pip command - args = ["uninstall", "-y", "--disable-pip-version-check"] - if verbosity: - args += ["-" + "v" * verbosity] - - _run_pip(args + [p[0] for p in reversed(_PROJECTS)]) - - -def _main(argv=None): - import argparse - parser = argparse.ArgumentParser(prog="python -m ensurepip") - parser.add_argument( - "--version", - action="version", - version="pip {}".format(version()), - help="Show the version of pip that is bundled with this Python.", - ) - parser.add_argument( - "-v", "--verbose", - action="count", - default=0, - dest="verbosity", - help=("Give more output. Option is additive, and can be used up to 3 " - "times."), - ) - parser.add_argument( - "-U", "--upgrade", - action="store_true", - default=False, - help="Upgrade pip and dependencies, even if already installed.", - ) - parser.add_argument( - "--user", - action="store_true", - default=False, - help="Install using the user scheme.", - ) - parser.add_argument( - "--root", - default=None, - help="Install everything relative to this alternate root directory.", - ) - parser.add_argument( - "--altinstall", - action="store_true", - default=False, - help=("Make an alternate install, installing only the X.Y versioned" - "scripts (Default: pipX, pipX.Y, easy_install-X.Y)"), - ) - parser.add_argument( - "--default-pip", - action="store_true", - default=True, - dest="default_pip", - help=argparse.SUPPRESS, - ) - parser.add_argument( - "--no-default-pip", - action="store_false", - dest="default_pip", - help=("Make a non default install, installing only the X and X.Y " - "versioned scripts."), - ) - - args = parser.parse_args(argv) - - bootstrap( - root=args.root, - upgrade=args.upgrade, - user=args.user, - verbosity=args.verbosity, - altinstall=args.altinstall, - default_pip=args.default_pip, - ) diff --git a/python/Lib/ensurepip/__main__.py b/python/Lib/ensurepip/__main__.py deleted file mode 100644 index 77527d7a35..0000000000 --- a/python/Lib/ensurepip/__main__.py +++ /dev/null @@ -1,4 +0,0 @@ -import ensurepip - -if __name__ == "__main__": - ensurepip._main() diff --git a/python/Lib/ensurepip/_bundled/pip-9.0.1-py2.py3-none-any.whl b/python/Lib/ensurepip/_bundled/pip-9.0.1-py2.py3-none-any.whl deleted file mode 100644 index 4b8ecc69db..0000000000 Binary files a/python/Lib/ensurepip/_bundled/pip-9.0.1-py2.py3-none-any.whl and /dev/null differ diff --git a/python/Lib/ensurepip/_bundled/setuptools-28.8.0-py2.py3-none-any.whl b/python/Lib/ensurepip/_bundled/setuptools-28.8.0-py2.py3-none-any.whl deleted file mode 100644 index 502e3cb418..0000000000 Binary files a/python/Lib/ensurepip/_bundled/setuptools-28.8.0-py2.py3-none-any.whl and /dev/null differ diff --git a/python/Lib/ensurepip/_uninstall.py b/python/Lib/ensurepip/_uninstall.py deleted file mode 100644 index 750365ec4d..0000000000 --- a/python/Lib/ensurepip/_uninstall.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Basic pip uninstallation support, helper for the Windows uninstaller""" - -import argparse -import ensurepip - - -def _main(argv=None): - parser = argparse.ArgumentParser(prog="python -m ensurepip._uninstall") - parser.add_argument( - "--version", - action="version", - version="pip {}".format(ensurepip.version()), - help="Show the version of pip this will attempt to uninstall.", - ) - parser.add_argument( - "-v", "--verbose", - action="count", - default=0, - dest="verbosity", - help=("Give more output. Option is additive, and can be used up to 3 " - "times."), - ) - - args = parser.parse_args(argv) - - ensurepip._uninstall_helper(verbosity=args.verbosity) - - -if __name__ == "__main__": - _main() diff --git a/python/Lib/filecmp.py b/python/Lib/filecmp.py deleted file mode 100755 index 3a793819c6..0000000000 --- a/python/Lib/filecmp.py +++ /dev/null @@ -1,296 +0,0 @@ -"""Utilities for comparing files and directories. - -Classes: - dircmp - -Functions: - cmp(f1, f2, shallow=1) -> int - cmpfiles(a, b, common) -> ([], [], []) - -""" - -import os -import stat -from itertools import ifilter, ifilterfalse, imap, izip - -__all__ = ["cmp","dircmp","cmpfiles"] - -_cache = {} -BUFSIZE=8*1024 - -def cmp(f1, f2, shallow=1): - """Compare two files. - - Arguments: - - f1 -- First file name - - f2 -- Second file name - - shallow -- Just check stat signature (do not read the files). - defaults to 1. - - Return value: - - True if the files are the same, False otherwise. - - This function uses a cache for past comparisons and the results, - with a cache invalidation mechanism relying on stale signatures. - - """ - - s1 = _sig(os.stat(f1)) - s2 = _sig(os.stat(f2)) - if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG: - return False - if shallow and s1 == s2: - return True - if s1[1] != s2[1]: - return False - - outcome = _cache.get((f1, f2, s1, s2)) - if outcome is None: - outcome = _do_cmp(f1, f2) - if len(_cache) > 100: # limit the maximum size of the cache - _cache.clear() - _cache[f1, f2, s1, s2] = outcome - return outcome - -def _sig(st): - return (stat.S_IFMT(st.st_mode), - st.st_size, - st.st_mtime) - -def _do_cmp(f1, f2): - bufsize = BUFSIZE - with open(f1, 'rb') as fp1, open(f2, 'rb') as fp2: - while True: - b1 = fp1.read(bufsize) - b2 = fp2.read(bufsize) - if b1 != b2: - return False - if not b1: - return True - -# Directory comparison class. -# -class dircmp: - """A class that manages the comparison of 2 directories. - - dircmp(a,b,ignore=None,hide=None) - A and B are directories. - IGNORE is a list of names to ignore, - defaults to ['RCS', 'CVS', 'tags']. - HIDE is a list of names to hide, - defaults to [os.curdir, os.pardir]. - - High level usage: - x = dircmp(dir1, dir2) - x.report() -> prints a report on the differences between dir1 and dir2 - or - x.report_partial_closure() -> prints report on differences between dir1 - and dir2, and reports on common immediate subdirectories. - x.report_full_closure() -> like report_partial_closure, - but fully recursive. - - Attributes: - left_list, right_list: The files in dir1 and dir2, - filtered by hide and ignore. - common: a list of names in both dir1 and dir2. - left_only, right_only: names only in dir1, dir2. - common_dirs: subdirectories in both dir1 and dir2. - common_files: files in both dir1 and dir2. - common_funny: names in both dir1 and dir2 where the type differs between - dir1 and dir2, or the name is not stat-able. - same_files: list of identical files. - diff_files: list of filenames which differ. - funny_files: list of files which could not be compared. - subdirs: a dictionary of dircmp objects, keyed by names in common_dirs. - """ - - def __init__(self, a, b, ignore=None, hide=None): # Initialize - self.left = a - self.right = b - if hide is None: - self.hide = [os.curdir, os.pardir] # Names never to be shown - else: - self.hide = hide - if ignore is None: - self.ignore = ['RCS', 'CVS', 'tags'] # Names ignored in comparison - else: - self.ignore = ignore - - def phase0(self): # Compare everything except common subdirectories - self.left_list = _filter(os.listdir(self.left), - self.hide+self.ignore) - self.right_list = _filter(os.listdir(self.right), - self.hide+self.ignore) - self.left_list.sort() - self.right_list.sort() - - def phase1(self): # Compute common names - a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list)) - b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list)) - self.common = map(a.__getitem__, ifilter(b.__contains__, a)) - self.left_only = map(a.__getitem__, ifilterfalse(b.__contains__, a)) - self.right_only = map(b.__getitem__, ifilterfalse(a.__contains__, b)) - - def phase2(self): # Distinguish files, directories, funnies - self.common_dirs = [] - self.common_files = [] - self.common_funny = [] - - for x in self.common: - a_path = os.path.join(self.left, x) - b_path = os.path.join(self.right, x) - - ok = 1 - try: - a_stat = os.stat(a_path) - except os.error, why: - # print 'Can\'t stat', a_path, ':', why[1] - ok = 0 - try: - b_stat = os.stat(b_path) - except os.error, why: - # print 'Can\'t stat', b_path, ':', why[1] - ok = 0 - - if ok: - a_type = stat.S_IFMT(a_stat.st_mode) - b_type = stat.S_IFMT(b_stat.st_mode) - if a_type != b_type: - self.common_funny.append(x) - elif stat.S_ISDIR(a_type): - self.common_dirs.append(x) - elif stat.S_ISREG(a_type): - self.common_files.append(x) - else: - self.common_funny.append(x) - else: - self.common_funny.append(x) - - def phase3(self): # Find out differences between common files - xx = cmpfiles(self.left, self.right, self.common_files) - self.same_files, self.diff_files, self.funny_files = xx - - def phase4(self): # Find out differences between common subdirectories - # A new dircmp object is created for each common subdirectory, - # these are stored in a dictionary indexed by filename. - # The hide and ignore properties are inherited from the parent - self.subdirs = {} - for x in self.common_dirs: - a_x = os.path.join(self.left, x) - b_x = os.path.join(self.right, x) - self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide) - - def phase4_closure(self): # Recursively call phase4() on subdirectories - self.phase4() - for sd in self.subdirs.itervalues(): - sd.phase4_closure() - - def report(self): # Print a report on the differences between a and b - # Output format is purposely lousy - print 'diff', self.left, self.right - if self.left_only: - self.left_only.sort() - print 'Only in', self.left, ':', self.left_only - if self.right_only: - self.right_only.sort() - print 'Only in', self.right, ':', self.right_only - if self.same_files: - self.same_files.sort() - print 'Identical files :', self.same_files - if self.diff_files: - self.diff_files.sort() - print 'Differing files :', self.diff_files - if self.funny_files: - self.funny_files.sort() - print 'Trouble with common files :', self.funny_files - if self.common_dirs: - self.common_dirs.sort() - print 'Common subdirectories :', self.common_dirs - if self.common_funny: - self.common_funny.sort() - print 'Common funny cases :', self.common_funny - - def report_partial_closure(self): # Print reports on self and on subdirs - self.report() - for sd in self.subdirs.itervalues(): - print - sd.report() - - def report_full_closure(self): # Report on self and subdirs recursively - self.report() - for sd in self.subdirs.itervalues(): - print - sd.report_full_closure() - - methodmap = dict(subdirs=phase4, - same_files=phase3, diff_files=phase3, funny_files=phase3, - common_dirs = phase2, common_files=phase2, common_funny=phase2, - common=phase1, left_only=phase1, right_only=phase1, - left_list=phase0, right_list=phase0) - - def __getattr__(self, attr): - if attr not in self.methodmap: - raise AttributeError, attr - self.methodmap[attr](self) - return getattr(self, attr) - -def cmpfiles(a, b, common, shallow=1): - """Compare common files in two directories. - - a, b -- directory names - common -- list of file names found in both directories - shallow -- if true, do comparison based solely on stat() information - - Returns a tuple of three lists: - files that compare equal - files that are different - filenames that aren't regular files. - - """ - res = ([], [], []) - for x in common: - ax = os.path.join(a, x) - bx = os.path.join(b, x) - res[_cmp(ax, bx, shallow)].append(x) - return res - - -# Compare two files. -# Return: -# 0 for equal -# 1 for different -# 2 for funny cases (can't stat, etc.) -# -def _cmp(a, b, sh, abs=abs, cmp=cmp): - try: - return not abs(cmp(a, b, sh)) - except (os.error, IOError): - return 2 - - -# Return a copy with items that occur in skip removed. -# -def _filter(flist, skip): - return list(ifilterfalse(skip.__contains__, flist)) - - -# Demonstration and testing. -# -def demo(): - import sys - import getopt - options, args = getopt.getopt(sys.argv[1:], 'r') - if len(args) != 2: - raise getopt.GetoptError('need exactly two args', None) - dd = dircmp(args[0], args[1]) - if ('-r', '') in options: - dd.report_full_closure() - else: - dd.report() - -if __name__ == '__main__': - demo() diff --git a/python/Lib/fileinput.py b/python/Lib/fileinput.py deleted file mode 100755 index b2e2f05030..0000000000 --- a/python/Lib/fileinput.py +++ /dev/null @@ -1,405 +0,0 @@ -"""Helper class to quickly write a loop over all standard input files. - -Typical use is: - - import fileinput - for line in fileinput.input(): - process(line) - -This iterates over the lines of all files listed in sys.argv[1:], -defaulting to sys.stdin if the list is empty. If a filename is '-' it -is also replaced by sys.stdin. To specify an alternative list of -filenames, pass it as the argument to input(). A single file name is -also allowed. - -Functions filename(), lineno() return the filename and cumulative line -number of the line that has just been read; filelineno() returns its -line number in the current file; isfirstline() returns true iff the -line just read is the first line of its file; isstdin() returns true -iff the line was read from sys.stdin. Function nextfile() closes the -current file so that the next iteration will read the first line from -the next file (if any); lines not read from the file will not count -towards the cumulative line count; the filename is not changed until -after the first line of the next file has been read. Function close() -closes the sequence. - -Before any lines have been read, filename() returns None and both line -numbers are zero; nextfile() has no effect. After all lines have been -read, filename() and the line number functions return the values -pertaining to the last line read; nextfile() has no effect. - -All files are opened in text mode by default, you can override this by -setting the mode parameter to input() or FileInput.__init__(). -If an I/O error occurs during opening or reading a file, the IOError -exception is raised. - -If sys.stdin is used more than once, the second and further use will -return no lines, except perhaps for interactive use, or if it has been -explicitly reset (e.g. using sys.stdin.seek(0)). - -Empty files are opened and immediately closed; the only time their -presence in the list of filenames is noticeable at all is when the -last file opened is empty. - -It is possible that the last line of a file doesn't end in a newline -character; otherwise lines are returned including the trailing -newline. - -Class FileInput is the implementation; its methods filename(), -lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close() -correspond to the functions in the module. In addition it has a -readline() method which returns the next input line, and a -__getitem__() method which implements the sequence behavior. The -sequence must be accessed in strictly sequential order; sequence -access and readline() cannot be mixed. - -Optional in-place filtering: if the keyword argument inplace=1 is -passed to input() or to the FileInput constructor, the file is moved -to a backup file and standard output is directed to the input file. -This makes it possible to write a filter that rewrites its input file -in place. If the keyword argument backup="." is also -given, it specifies the extension for the backup file, and the backup -file remains around; by default, the extension is ".bak" and it is -deleted when the output file is closed. In-place filtering is -disabled when standard input is read. XXX The current implementation -does not work for MS-DOS 8+3 filesystems. - -XXX Possible additions: - -- optional getopt argument processing -- isatty() -- read(), read(size), even readlines() - -""" - -import sys, os - -__all__ = ["input","close","nextfile","filename","lineno","filelineno", - "isfirstline","isstdin","FileInput"] - -_state = None - -# No longer used -DEFAULT_BUFSIZE = 8*1024 - -def input(files=None, inplace=0, backup="", bufsize=0, - mode="r", openhook=None): - """Return an instance of the FileInput class, which can be iterated. - - The parameters are passed to the constructor of the FileInput class. - The returned instance, in addition to being an iterator, - keeps global state for the functions of this module,. - """ - global _state - if _state and _state._file: - raise RuntimeError, "input() already active" - _state = FileInput(files, inplace, backup, bufsize, mode, openhook) - return _state - -def close(): - """Close the sequence.""" - global _state - state = _state - _state = None - if state: - state.close() - -def nextfile(): - """ - Close the current file so that the next iteration will read the first - line from the next file (if any); lines not read from the file will - not count towards the cumulative line count. The filename is not - changed until after the first line of the next file has been read. - Before the first line has been read, this function has no effect; - it cannot be used to skip the first file. After the last line of the - last file has been read, this function has no effect. - """ - if not _state: - raise RuntimeError, "no active input()" - return _state.nextfile() - -def filename(): - """ - Return the name of the file currently being read. - Before the first line has been read, returns None. - """ - if not _state: - raise RuntimeError, "no active input()" - return _state.filename() - -def lineno(): - """ - Return the cumulative line number of the line that has just been read. - Before the first line has been read, returns 0. After the last line - of the last file has been read, returns the line number of that line. - """ - if not _state: - raise RuntimeError, "no active input()" - return _state.lineno() - -def filelineno(): - """ - Return the line number in the current file. Before the first line - has been read, returns 0. After the last line of the last file has - been read, returns the line number of that line within the file. - """ - if not _state: - raise RuntimeError, "no active input()" - return _state.filelineno() - -def fileno(): - """ - Return the file number of the current file. When no file is currently - opened, returns -1. - """ - if not _state: - raise RuntimeError, "no active input()" - return _state.fileno() - -def isfirstline(): - """ - Returns true the line just read is the first line of its file, - otherwise returns false. - """ - if not _state: - raise RuntimeError, "no active input()" - return _state.isfirstline() - -def isstdin(): - """ - Returns true if the last line was read from sys.stdin, - otherwise returns false. - """ - if not _state: - raise RuntimeError, "no active input()" - return _state.isstdin() - -class FileInput: - """FileInput([files[, inplace[, backup[, bufsize[, mode[, openhook]]]]]]) - - Class FileInput is the implementation of the module; its methods - filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(), - nextfile() and close() correspond to the functions of the same name - in the module. - In addition it has a readline() method which returns the next - input line, and a __getitem__() method which implements the - sequence behavior. The sequence must be accessed in strictly - sequential order; random access and readline() cannot be mixed. - """ - - def __init__(self, files=None, inplace=0, backup="", bufsize=0, - mode="r", openhook=None): - if isinstance(files, basestring): - files = (files,) - else: - if files is None: - files = sys.argv[1:] - if not files: - files = ('-',) - else: - files = tuple(files) - self._files = files - self._inplace = inplace - self._backup = backup - self._savestdout = None - self._output = None - self._filename = None - self._startlineno = 0 - self._filelineno = 0 - self._file = None - self._isstdin = False - self._backupfilename = None - # restrict mode argument to reading modes - if mode not in ('r', 'rU', 'U', 'rb'): - raise ValueError("FileInput opening mode must be one of " - "'r', 'rU', 'U' and 'rb'") - self._mode = mode - if inplace and openhook: - raise ValueError("FileInput cannot use an opening hook in inplace mode") - elif openhook and not hasattr(openhook, '__call__'): - raise ValueError("FileInput openhook must be callable") - self._openhook = openhook - - def __del__(self): - self.close() - - def close(self): - try: - self.nextfile() - finally: - self._files = () - - def __iter__(self): - return self - - def next(self): - while 1: - line = self._readline() - if line: - self._filelineno += 1 - return line - if not self._file: - raise StopIteration - self.nextfile() - # repeat with next file - - def __getitem__(self, i): - if i != self.lineno(): - raise RuntimeError, "accessing lines out of order" - try: - return self.next() - except StopIteration: - raise IndexError, "end of input reached" - - def nextfile(self): - savestdout = self._savestdout - self._savestdout = 0 - if savestdout: - sys.stdout = savestdout - - output = self._output - self._output = 0 - try: - if output: - output.close() - finally: - file = self._file - self._file = None - try: - del self._readline # restore FileInput._readline - except AttributeError: - pass - try: - if file and not self._isstdin: - file.close() - finally: - backupfilename = self._backupfilename - self._backupfilename = 0 - if backupfilename and not self._backup: - try: os.unlink(backupfilename) - except OSError: pass - - self._isstdin = False - - def readline(self): - while 1: - line = self._readline() - if line: - self._filelineno += 1 - return line - if not self._file: - return line - self.nextfile() - # repeat with next file - - def _readline(self): - if not self._files: - return "" - self._filename = self._files[0] - self._files = self._files[1:] - self._startlineno = self.lineno() - self._filelineno = 0 - self._file = None - self._isstdin = False - self._backupfilename = 0 - if self._filename == '-': - self._filename = '' - self._file = sys.stdin - self._isstdin = True - else: - if self._inplace: - self._backupfilename = ( - self._filename + (self._backup or os.extsep+"bak")) - try: os.unlink(self._backupfilename) - except os.error: pass - # The next few lines may raise IOError - os.rename(self._filename, self._backupfilename) - self._file = open(self._backupfilename, self._mode) - try: - perm = os.fstat(self._file.fileno()).st_mode - except OSError: - self._output = open(self._filename, "w") - else: - fd = os.open(self._filename, - os.O_CREAT | os.O_WRONLY | os.O_TRUNC, - perm) - self._output = os.fdopen(fd, "w") - try: - if hasattr(os, 'chmod'): - os.chmod(self._filename, perm) - except OSError: - pass - self._savestdout = sys.stdout - sys.stdout = self._output - else: - # This may raise IOError - if self._openhook: - self._file = self._openhook(self._filename, self._mode) - else: - self._file = open(self._filename, self._mode) - - self._readline = self._file.readline # hide FileInput._readline - return self._readline() - - def filename(self): - return self._filename - - def lineno(self): - return self._startlineno + self._filelineno - - def filelineno(self): - return self._filelineno - - def fileno(self): - if self._file: - try: - return self._file.fileno() - except ValueError: - return -1 - else: - return -1 - - def isfirstline(self): - return self._filelineno == 1 - - def isstdin(self): - return self._isstdin - - -def hook_compressed(filename, mode): - ext = os.path.splitext(filename)[1] - if ext == '.gz': - import gzip - return gzip.open(filename, mode) - elif ext == '.bz2': - import bz2 - return bz2.BZ2File(filename, mode) - else: - return open(filename, mode) - - -def hook_encoded(encoding): - import io - def openhook(filename, mode): - mode = mode.replace('U', '').replace('b', '') or 'r' - return io.open(filename, mode, encoding=encoding, newline='') - return openhook - - -def _test(): - import getopt - inplace = 0 - backup = 0 - opts, args = getopt.getopt(sys.argv[1:], "ib:") - for o, a in opts: - if o == '-i': inplace = 1 - if o == '-b': backup = a - for line in input(args, inplace=inplace, backup=backup): - if line[-1:] == '\n': line = line[:-1] - if line[-1:] == '\r': line = line[:-1] - print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(), - isfirstline() and "*" or "", line) - print "%d: %s[%d]" % (lineno(), filename(), filelineno()) - -if __name__ == '__main__': - _test() diff --git a/python/Lib/fnmatch.py b/python/Lib/fnmatch.py deleted file mode 100755 index 99002e6e6b..0000000000 --- a/python/Lib/fnmatch.py +++ /dev/null @@ -1,120 +0,0 @@ -"""Filename matching with shell patterns. - -fnmatch(FILENAME, PATTERN) matches according to the local convention. -fnmatchcase(FILENAME, PATTERN) always takes case in account. - -The functions operate by translating the pattern into a regular -expression. They cache the compiled regular expressions for speed. - -The function translate(PATTERN) returns a regular expression -corresponding to PATTERN. (It does not compile it.) -""" - -import re - -__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] - -_cache = {} -_MAXCACHE = 100 - -def _purge(): - """Clear the pattern cache""" - _cache.clear() - -def fnmatch(name, pat): - """Test whether FILENAME matches PATTERN. - - Patterns are Unix shell style: - - * matches everything - ? matches any single character - [seq] matches any character in seq - [!seq] matches any char not in seq - - An initial period in FILENAME is not special. - Both FILENAME and PATTERN are first case-normalized - if the operating system requires it. - If you don't want this, use fnmatchcase(FILENAME, PATTERN). - """ - - import os - name = os.path.normcase(name) - pat = os.path.normcase(pat) - return fnmatchcase(name, pat) - -def filter(names, pat): - """Return the subset of the list NAMES that match PAT""" - import os,posixpath - result=[] - pat=os.path.normcase(pat) - try: - re_pat = _cache[pat] - except KeyError: - res = translate(pat) - if len(_cache) >= _MAXCACHE: - _cache.clear() - _cache[pat] = re_pat = re.compile(res) - match = re_pat.match - if os.path is posixpath: - # normcase on posix is NOP. Optimize it away from the loop. - for name in names: - if match(name): - result.append(name) - else: - for name in names: - if match(os.path.normcase(name)): - result.append(name) - return result - -def fnmatchcase(name, pat): - """Test whether FILENAME matches PATTERN, including case. - - This is a version of fnmatch() which doesn't case-normalize - its arguments. - """ - - try: - re_pat = _cache[pat] - except KeyError: - res = translate(pat) - if len(_cache) >= _MAXCACHE: - _cache.clear() - _cache[pat] = re_pat = re.compile(res) - return re_pat.match(name) is not None - -def translate(pat): - """Translate a shell PATTERN to a regular expression. - - There is no way to quote meta-characters. - """ - - i, n = 0, len(pat) - res = '' - while i < n: - c = pat[i] - i = i+1 - if c == '*': - res = res + '.*' - elif c == '?': - res = res + '.' - elif c == '[': - j = i - if j < n and pat[j] == '!': - j = j+1 - if j < n and pat[j] == ']': - j = j+1 - while j < n and pat[j] != ']': - j = j+1 - if j >= n: - res = res + '\\[' - else: - stuff = pat[i:j].replace('\\','\\\\') - i = j+1 - if stuff[0] == '!': - stuff = '^' + stuff[1:] - elif stuff[0] == '^': - stuff = '\\' + stuff - res = '%s[%s]' % (res, stuff) - else: - res = res + re.escape(c) - return res + '\Z(?ms)' diff --git a/python/Lib/formatter.py b/python/Lib/formatter.py deleted file mode 100755 index e0a8fe10b2..0000000000 --- a/python/Lib/formatter.py +++ /dev/null @@ -1,445 +0,0 @@ -"""Generic output formatting. - -Formatter objects transform an abstract flow of formatting events into -specific output events on writer objects. Formatters manage several stack -structures to allow various properties of a writer object to be changed and -restored; writers need not be able to handle relative changes nor any sort -of ``change back'' operation. Specific writer properties which may be -controlled via formatter objects are horizontal alignment, font, and left -margin indentations. A mechanism is provided which supports providing -arbitrary, non-exclusive style settings to a writer as well. Additional -interfaces facilitate formatting events which are not reversible, such as -paragraph separation. - -Writer objects encapsulate device interfaces. Abstract devices, such as -file formats, are supported as well as physical devices. The provided -implementations all work with abstract devices. The interface makes -available mechanisms for setting the properties which formatter objects -manage and inserting data into the output. -""" - -import sys - - -AS_IS = None - - -class NullFormatter: - """A formatter which does nothing. - - If the writer parameter is omitted, a NullWriter instance is created. - No methods of the writer are called by NullFormatter instances. - - Implementations should inherit from this class if implementing a writer - interface but don't need to inherit any implementation. - - """ - - def __init__(self, writer=None): - if writer is None: - writer = NullWriter() - self.writer = writer - def end_paragraph(self, blankline): pass - def add_line_break(self): pass - def add_hor_rule(self, *args, **kw): pass - def add_label_data(self, format, counter, blankline=None): pass - def add_flowing_data(self, data): pass - def add_literal_data(self, data): pass - def flush_softspace(self): pass - def push_alignment(self, align): pass - def pop_alignment(self): pass - def push_font(self, x): pass - def pop_font(self): pass - def push_margin(self, margin): pass - def pop_margin(self): pass - def set_spacing(self, spacing): pass - def push_style(self, *styles): pass - def pop_style(self, n=1): pass - def assert_line_data(self, flag=1): pass - - -class AbstractFormatter: - """The standard formatter. - - This implementation has demonstrated wide applicability to many writers, - and may be used directly in most circumstances. It has been used to - implement a full-featured World Wide Web browser. - - """ - - # Space handling policy: blank spaces at the boundary between elements - # are handled by the outermost context. "Literal" data is not checked - # to determine context, so spaces in literal data are handled directly - # in all circumstances. - - def __init__(self, writer): - self.writer = writer # Output device - self.align = None # Current alignment - self.align_stack = [] # Alignment stack - self.font_stack = [] # Font state - self.margin_stack = [] # Margin state - self.spacing = None # Vertical spacing state - self.style_stack = [] # Other state, e.g. color - self.nospace = 1 # Should leading space be suppressed - self.softspace = 0 # Should a space be inserted - self.para_end = 1 # Just ended a paragraph - self.parskip = 0 # Skipped space between paragraphs? - self.hard_break = 1 # Have a hard break - self.have_label = 0 - - def end_paragraph(self, blankline): - if not self.hard_break: - self.writer.send_line_break() - self.have_label = 0 - if self.parskip < blankline and not self.have_label: - self.writer.send_paragraph(blankline - self.parskip) - self.parskip = blankline - self.have_label = 0 - self.hard_break = self.nospace = self.para_end = 1 - self.softspace = 0 - - def add_line_break(self): - if not (self.hard_break or self.para_end): - self.writer.send_line_break() - self.have_label = self.parskip = 0 - self.hard_break = self.nospace = 1 - self.softspace = 0 - - def add_hor_rule(self, *args, **kw): - if not self.hard_break: - self.writer.send_line_break() - self.writer.send_hor_rule(*args, **kw) - self.hard_break = self.nospace = 1 - self.have_label = self.para_end = self.softspace = self.parskip = 0 - - def add_label_data(self, format, counter, blankline = None): - if self.have_label or not self.hard_break: - self.writer.send_line_break() - if not self.para_end: - self.writer.send_paragraph((blankline and 1) or 0) - if isinstance(format, str): - self.writer.send_label_data(self.format_counter(format, counter)) - else: - self.writer.send_label_data(format) - self.nospace = self.have_label = self.hard_break = self.para_end = 1 - self.softspace = self.parskip = 0 - - def format_counter(self, format, counter): - label = '' - for c in format: - if c == '1': - label = label + ('%d' % counter) - elif c in 'aA': - if counter > 0: - label = label + self.format_letter(c, counter) - elif c in 'iI': - if counter > 0: - label = label + self.format_roman(c, counter) - else: - label = label + c - return label - - def format_letter(self, case, counter): - label = '' - while counter > 0: - counter, x = divmod(counter-1, 26) - # This makes a strong assumption that lowercase letters - # and uppercase letters form two contiguous blocks, with - # letters in order! - s = chr(ord(case) + x) - label = s + label - return label - - def format_roman(self, case, counter): - ones = ['i', 'x', 'c', 'm'] - fives = ['v', 'l', 'd'] - label, index = '', 0 - # This will die of IndexError when counter is too big - while counter > 0: - counter, x = divmod(counter, 10) - if x == 9: - label = ones[index] + ones[index+1] + label - elif x == 4: - label = ones[index] + fives[index] + label - else: - if x >= 5: - s = fives[index] - x = x-5 - else: - s = '' - s = s + ones[index]*x - label = s + label - index = index + 1 - if case == 'I': - return label.upper() - return label - - def add_flowing_data(self, data): - if not data: return - prespace = data[:1].isspace() - postspace = data[-1:].isspace() - data = " ".join(data.split()) - if self.nospace and not data: - return - elif prespace or self.softspace: - if not data: - if not self.nospace: - self.softspace = 1 - self.parskip = 0 - return - if not self.nospace: - data = ' ' + data - self.hard_break = self.nospace = self.para_end = \ - self.parskip = self.have_label = 0 - self.softspace = postspace - self.writer.send_flowing_data(data) - - def add_literal_data(self, data): - if not data: return - if self.softspace: - self.writer.send_flowing_data(" ") - self.hard_break = data[-1:] == '\n' - self.nospace = self.para_end = self.softspace = \ - self.parskip = self.have_label = 0 - self.writer.send_literal_data(data) - - def flush_softspace(self): - if self.softspace: - self.hard_break = self.para_end = self.parskip = \ - self.have_label = self.softspace = 0 - self.nospace = 1 - self.writer.send_flowing_data(' ') - - def push_alignment(self, align): - if align and align != self.align: - self.writer.new_alignment(align) - self.align = align - self.align_stack.append(align) - else: - self.align_stack.append(self.align) - - def pop_alignment(self): - if self.align_stack: - del self.align_stack[-1] - if self.align_stack: - self.align = align = self.align_stack[-1] - self.writer.new_alignment(align) - else: - self.align = None - self.writer.new_alignment(None) - - def push_font(self, font): - size, i, b, tt = font - if self.softspace: - self.hard_break = self.para_end = self.softspace = 0 - self.nospace = 1 - self.writer.send_flowing_data(' ') - if self.font_stack: - csize, ci, cb, ctt = self.font_stack[-1] - if size is AS_IS: size = csize - if i is AS_IS: i = ci - if b is AS_IS: b = cb - if tt is AS_IS: tt = ctt - font = (size, i, b, tt) - self.font_stack.append(font) - self.writer.new_font(font) - - def pop_font(self): - if self.font_stack: - del self.font_stack[-1] - if self.font_stack: - font = self.font_stack[-1] - else: - font = None - self.writer.new_font(font) - - def push_margin(self, margin): - self.margin_stack.append(margin) - fstack = filter(None, self.margin_stack) - if not margin and fstack: - margin = fstack[-1] - self.writer.new_margin(margin, len(fstack)) - - def pop_margin(self): - if self.margin_stack: - del self.margin_stack[-1] - fstack = filter(None, self.margin_stack) - if fstack: - margin = fstack[-1] - else: - margin = None - self.writer.new_margin(margin, len(fstack)) - - def set_spacing(self, spacing): - self.spacing = spacing - self.writer.new_spacing(spacing) - - def push_style(self, *styles): - if self.softspace: - self.hard_break = self.para_end = self.softspace = 0 - self.nospace = 1 - self.writer.send_flowing_data(' ') - for style in styles: - self.style_stack.append(style) - self.writer.new_styles(tuple(self.style_stack)) - - def pop_style(self, n=1): - del self.style_stack[-n:] - self.writer.new_styles(tuple(self.style_stack)) - - def assert_line_data(self, flag=1): - self.nospace = self.hard_break = not flag - self.para_end = self.parskip = self.have_label = 0 - - -class NullWriter: - """Minimal writer interface to use in testing & inheritance. - - A writer which only provides the interface definition; no actions are - taken on any methods. This should be the base class for all writers - which do not need to inherit any implementation methods. - - """ - def __init__(self): pass - def flush(self): pass - def new_alignment(self, align): pass - def new_font(self, font): pass - def new_margin(self, margin, level): pass - def new_spacing(self, spacing): pass - def new_styles(self, styles): pass - def send_paragraph(self, blankline): pass - def send_line_break(self): pass - def send_hor_rule(self, *args, **kw): pass - def send_label_data(self, data): pass - def send_flowing_data(self, data): pass - def send_literal_data(self, data): pass - - -class AbstractWriter(NullWriter): - """A writer which can be used in debugging formatters, but not much else. - - Each method simply announces itself by printing its name and - arguments on standard output. - - """ - - def new_alignment(self, align): - print "new_alignment(%r)" % (align,) - - def new_font(self, font): - print "new_font(%r)" % (font,) - - def new_margin(self, margin, level): - print "new_margin(%r, %d)" % (margin, level) - - def new_spacing(self, spacing): - print "new_spacing(%r)" % (spacing,) - - def new_styles(self, styles): - print "new_styles(%r)" % (styles,) - - def send_paragraph(self, blankline): - print "send_paragraph(%r)" % (blankline,) - - def send_line_break(self): - print "send_line_break()" - - def send_hor_rule(self, *args, **kw): - print "send_hor_rule()" - - def send_label_data(self, data): - print "send_label_data(%r)" % (data,) - - def send_flowing_data(self, data): - print "send_flowing_data(%r)" % (data,) - - def send_literal_data(self, data): - print "send_literal_data(%r)" % (data,) - - -class DumbWriter(NullWriter): - """Simple writer class which writes output on the file object passed in - as the file parameter or, if file is omitted, on standard output. The - output is simply word-wrapped to the number of columns specified by - the maxcol parameter. This class is suitable for reflowing a sequence - of paragraphs. - - """ - - def __init__(self, file=None, maxcol=72): - self.file = file or sys.stdout - self.maxcol = maxcol - NullWriter.__init__(self) - self.reset() - - def reset(self): - self.col = 0 - self.atbreak = 0 - - def send_paragraph(self, blankline): - self.file.write('\n'*blankline) - self.col = 0 - self.atbreak = 0 - - def send_line_break(self): - self.file.write('\n') - self.col = 0 - self.atbreak = 0 - - def send_hor_rule(self, *args, **kw): - self.file.write('\n') - self.file.write('-'*self.maxcol) - self.file.write('\n') - self.col = 0 - self.atbreak = 0 - - def send_literal_data(self, data): - self.file.write(data) - i = data.rfind('\n') - if i >= 0: - self.col = 0 - data = data[i+1:] - data = data.expandtabs() - self.col = self.col + len(data) - self.atbreak = 0 - - def send_flowing_data(self, data): - if not data: return - atbreak = self.atbreak or data[0].isspace() - col = self.col - maxcol = self.maxcol - write = self.file.write - for word in data.split(): - if atbreak: - if col + len(word) >= maxcol: - write('\n') - col = 0 - else: - write(' ') - col = col + 1 - write(word) - col = col + len(word) - atbreak = 1 - self.col = col - self.atbreak = data[-1].isspace() - - -def test(file = None): - w = DumbWriter() - f = AbstractFormatter(w) - if file is not None: - fp = open(file) - elif sys.argv[1:]: - fp = open(sys.argv[1]) - else: - fp = sys.stdin - for line in fp: - if line == '\n': - f.end_paragraph(1) - else: - f.add_flowing_data(line) - f.end_paragraph(0) - - -if __name__ == '__main__': - test() diff --git a/python/Lib/fpformat.py b/python/Lib/fpformat.py deleted file mode 100755 index 71cbb25f3c..0000000000 --- a/python/Lib/fpformat.py +++ /dev/null @@ -1,145 +0,0 @@ -"""General floating point formatting functions. - -Functions: -fix(x, digits_behind) -sci(x, digits_behind) - -Each takes a number or a string and a number of digits as arguments. - -Parameters: -x: number to be formatted; or a string resembling a number -digits_behind: number of digits behind the decimal point -""" -from warnings import warnpy3k -warnpy3k("the fpformat module has been removed in Python 3.0", stacklevel=2) -del warnpy3k - -import re - -__all__ = ["fix","sci","NotANumber"] - -# Compiled regular expression to "decode" a number -decoder = re.compile(r'^([-+]?)0*(\d*)((?:\.\d*)?)(([eE][-+]?\d+)?)$') -# \0 the whole thing -# \1 leading sign or empty -# \2 digits left of decimal point -# \3 fraction (empty or begins with point) -# \4 exponent part (empty or begins with 'e' or 'E') - -try: - class NotANumber(ValueError): - pass -except TypeError: - NotANumber = 'fpformat.NotANumber' - -def extract(s): - """Return (sign, intpart, fraction, expo) or raise an exception: - sign is '+' or '-' - intpart is 0 or more digits beginning with a nonzero - fraction is 0 or more digits - expo is an integer""" - res = decoder.match(s) - if res is None: raise NotANumber, s - sign, intpart, fraction, exppart = res.group(1,2,3,4) - if sign == '+': sign = '' - if fraction: fraction = fraction[1:] - if exppart: expo = int(exppart[1:]) - else: expo = 0 - return sign, intpart, fraction, expo - -def unexpo(intpart, fraction, expo): - """Remove the exponent by changing intpart and fraction.""" - if expo > 0: # Move the point left - f = len(fraction) - intpart, fraction = intpart + fraction[:expo], fraction[expo:] - if expo > f: - intpart = intpart + '0'*(expo-f) - elif expo < 0: # Move the point right - i = len(intpart) - intpart, fraction = intpart[:expo], intpart[expo:] + fraction - if expo < -i: - fraction = '0'*(-expo-i) + fraction - return intpart, fraction - -def roundfrac(intpart, fraction, digs): - """Round or extend the fraction to size digs.""" - f = len(fraction) - if f <= digs: - return intpart, fraction + '0'*(digs-f) - i = len(intpart) - if i+digs < 0: - return '0'*-digs, '' - total = intpart + fraction - nextdigit = total[i+digs] - if nextdigit >= '5': # Hard case: increment last digit, may have carry! - n = i + digs - 1 - while n >= 0: - if total[n] != '9': break - n = n-1 - else: - total = '0' + total - i = i+1 - n = 0 - total = total[:n] + chr(ord(total[n]) + 1) + '0'*(len(total)-n-1) - intpart, fraction = total[:i], total[i:] - if digs >= 0: - return intpart, fraction[:digs] - else: - return intpart[:digs] + '0'*-digs, '' - -def fix(x, digs): - """Format x as [-]ddd.ddd with 'digs' digits after the point - and at least one digit before. - If digs <= 0, the point is suppressed.""" - if type(x) != type(''): x = repr(x) - try: - sign, intpart, fraction, expo = extract(x) - except NotANumber: - return x - intpart, fraction = unexpo(intpart, fraction, expo) - intpart, fraction = roundfrac(intpart, fraction, digs) - while intpart and intpart[0] == '0': intpart = intpart[1:] - if intpart == '': intpart = '0' - if digs > 0: return sign + intpart + '.' + fraction - else: return sign + intpart - -def sci(x, digs): - """Format x as [-]d.dddE[+-]ddd with 'digs' digits after the point - and exactly one digit before. - If digs is <= 0, one digit is kept and the point is suppressed.""" - if type(x) != type(''): x = repr(x) - sign, intpart, fraction, expo = extract(x) - if not intpart: - while fraction and fraction[0] == '0': - fraction = fraction[1:] - expo = expo - 1 - if fraction: - intpart, fraction = fraction[0], fraction[1:] - expo = expo - 1 - else: - intpart = '0' - else: - expo = expo + len(intpart) - 1 - intpart, fraction = intpart[0], intpart[1:] + fraction - digs = max(0, digs) - intpart, fraction = roundfrac(intpart, fraction, digs) - if len(intpart) > 1: - intpart, fraction, expo = \ - intpart[0], intpart[1:] + fraction[:-1], \ - expo + len(intpart) - 1 - s = sign + intpart - if digs > 0: s = s + '.' + fraction - e = repr(abs(expo)) - e = '0'*(3-len(e)) + e - if expo < 0: e = '-' + e - else: e = '+' + e - return s + 'e' + e - -def test(): - """Interactive test run.""" - try: - while 1: - x, digs = input('Enter (x, digs): ') - print x, fix(x, digs), sci(x, digs) - except (EOFError, KeyboardInterrupt): - pass diff --git a/python/Lib/fractions.py b/python/Lib/fractions.py deleted file mode 100755 index a0d86a4393..0000000000 --- a/python/Lib/fractions.py +++ /dev/null @@ -1,605 +0,0 @@ -# Originally contributed by Sjoerd Mullender. -# Significantly modified by Jeffrey Yasskin . - -"""Rational, infinite-precision, real numbers.""" - -from __future__ import division -from decimal import Decimal -import math -import numbers -import operator -import re - -__all__ = ['Fraction', 'gcd'] - -Rational = numbers.Rational - - -def gcd(a, b): - """Calculate the Greatest Common Divisor of a and b. - - Unless b==0, the result will have the same sign as b (so that when - b is divided by it, the result comes out positive). - """ - while b: - a, b = b, a%b - return a - - -_RATIONAL_FORMAT = re.compile(r""" - \A\s* # optional whitespace at the start, then - (?P[-+]?) # an optional sign, then - (?=\d|\.\d) # lookahead for digit or .digit - (?P\d*) # numerator (possibly empty) - (?: # followed by - (?:/(?P\d+))? # an optional denominator - | # or - (?:\.(?P\d*))? # an optional fractional part - (?:E(?P[-+]?\d+))? # and optional exponent - ) - \s*\Z # and optional whitespace to finish -""", re.VERBOSE | re.IGNORECASE) - - -class Fraction(Rational): - """This class implements rational numbers. - - In the two-argument form of the constructor, Fraction(8, 6) will - produce a rational number equivalent to 4/3. Both arguments must - be Rational. The numerator defaults to 0 and the denominator - defaults to 1 so that Fraction(3) == 3 and Fraction() == 0. - - Fractions can also be constructed from: - - - numeric strings similar to those accepted by the - float constructor (for example, '-2.3' or '1e10') - - - strings of the form '123/456' - - - float and Decimal instances - - - other Rational instances (including integers) - - """ - - __slots__ = ('_numerator', '_denominator') - - # We're immutable, so use __new__ not __init__ - def __new__(cls, numerator=0, denominator=None): - """Constructs a Fraction. - - Takes a string like '3/2' or '1.5', another Rational instance, a - numerator/denominator pair, or a float. - - Examples - -------- - - >>> Fraction(10, -8) - Fraction(-5, 4) - >>> Fraction(Fraction(1, 7), 5) - Fraction(1, 35) - >>> Fraction(Fraction(1, 7), Fraction(2, 3)) - Fraction(3, 14) - >>> Fraction('314') - Fraction(314, 1) - >>> Fraction('-35/4') - Fraction(-35, 4) - >>> Fraction('3.1415') # conversion from numeric string - Fraction(6283, 2000) - >>> Fraction('-47e-2') # string may include a decimal exponent - Fraction(-47, 100) - >>> Fraction(1.47) # direct construction from float (exact conversion) - Fraction(6620291452234629, 4503599627370496) - >>> Fraction(2.25) - Fraction(9, 4) - >>> Fraction(Decimal('1.47')) - Fraction(147, 100) - - """ - self = super(Fraction, cls).__new__(cls) - - if denominator is None: - if isinstance(numerator, Rational): - self._numerator = numerator.numerator - self._denominator = numerator.denominator - return self - - elif isinstance(numerator, float): - # Exact conversion from float - value = Fraction.from_float(numerator) - self._numerator = value._numerator - self._denominator = value._denominator - return self - - elif isinstance(numerator, Decimal): - value = Fraction.from_decimal(numerator) - self._numerator = value._numerator - self._denominator = value._denominator - return self - - elif isinstance(numerator, basestring): - # Handle construction from strings. - m = _RATIONAL_FORMAT.match(numerator) - if m is None: - raise ValueError('Invalid literal for Fraction: %r' % - numerator) - numerator = int(m.group('num') or '0') - denom = m.group('denom') - if denom: - denominator = int(denom) - else: - denominator = 1 - decimal = m.group('decimal') - if decimal: - scale = 10**len(decimal) - numerator = numerator * scale + int(decimal) - denominator *= scale - exp = m.group('exp') - if exp: - exp = int(exp) - if exp >= 0: - numerator *= 10**exp - else: - denominator *= 10**-exp - if m.group('sign') == '-': - numerator = -numerator - - else: - raise TypeError("argument should be a string " - "or a Rational instance") - - elif (isinstance(numerator, Rational) and - isinstance(denominator, Rational)): - numerator, denominator = ( - numerator.numerator * denominator.denominator, - denominator.numerator * numerator.denominator - ) - else: - raise TypeError("both arguments should be " - "Rational instances") - - if denominator == 0: - raise ZeroDivisionError('Fraction(%s, 0)' % numerator) - g = gcd(numerator, denominator) - self._numerator = numerator // g - self._denominator = denominator // g - return self - - @classmethod - def from_float(cls, f): - """Converts a finite float to a rational number, exactly. - - Beware that Fraction.from_float(0.3) != Fraction(3, 10). - - """ - if isinstance(f, numbers.Integral): - return cls(f) - elif not isinstance(f, float): - raise TypeError("%s.from_float() only takes floats, not %r (%s)" % - (cls.__name__, f, type(f).__name__)) - if math.isnan(f) or math.isinf(f): - raise TypeError("Cannot convert %r to %s." % (f, cls.__name__)) - return cls(*f.as_integer_ratio()) - - @classmethod - def from_decimal(cls, dec): - """Converts a finite Decimal instance to a rational number, exactly.""" - from decimal import Decimal - if isinstance(dec, numbers.Integral): - dec = Decimal(int(dec)) - elif not isinstance(dec, Decimal): - raise TypeError( - "%s.from_decimal() only takes Decimals, not %r (%s)" % - (cls.__name__, dec, type(dec).__name__)) - if not dec.is_finite(): - # Catches infinities and nans. - raise TypeError("Cannot convert %s to %s." % (dec, cls.__name__)) - sign, digits, exp = dec.as_tuple() - digits = int(''.join(map(str, digits))) - if sign: - digits = -digits - if exp >= 0: - return cls(digits * 10 ** exp) - else: - return cls(digits, 10 ** -exp) - - def limit_denominator(self, max_denominator=1000000): - """Closest Fraction to self with denominator at most max_denominator. - - >>> Fraction('3.141592653589793').limit_denominator(10) - Fraction(22, 7) - >>> Fraction('3.141592653589793').limit_denominator(100) - Fraction(311, 99) - >>> Fraction(4321, 8765).limit_denominator(10000) - Fraction(4321, 8765) - - """ - # Algorithm notes: For any real number x, define a *best upper - # approximation* to x to be a rational number p/q such that: - # - # (1) p/q >= x, and - # (2) if p/q > r/s >= x then s > q, for any rational r/s. - # - # Define *best lower approximation* similarly. Then it can be - # proved that a rational number is a best upper or lower - # approximation to x if, and only if, it is a convergent or - # semiconvergent of the (unique shortest) continued fraction - # associated to x. - # - # To find a best rational approximation with denominator <= M, - # we find the best upper and lower approximations with - # denominator <= M and take whichever of these is closer to x. - # In the event of a tie, the bound with smaller denominator is - # chosen. If both denominators are equal (which can happen - # only when max_denominator == 1 and self is midway between - # two integers) the lower bound---i.e., the floor of self, is - # taken. - - if max_denominator < 1: - raise ValueError("max_denominator should be at least 1") - if self._denominator <= max_denominator: - return Fraction(self) - - p0, q0, p1, q1 = 0, 1, 1, 0 - n, d = self._numerator, self._denominator - while True: - a = n//d - q2 = q0+a*q1 - if q2 > max_denominator: - break - p0, q0, p1, q1 = p1, q1, p0+a*p1, q2 - n, d = d, n-a*d - - k = (max_denominator-q0)//q1 - bound1 = Fraction(p0+k*p1, q0+k*q1) - bound2 = Fraction(p1, q1) - if abs(bound2 - self) <= abs(bound1-self): - return bound2 - else: - return bound1 - - @property - def numerator(a): - return a._numerator - - @property - def denominator(a): - return a._denominator - - def __repr__(self): - """repr(self)""" - return ('Fraction(%s, %s)' % (self._numerator, self._denominator)) - - def __str__(self): - """str(self)""" - if self._denominator == 1: - return str(self._numerator) - else: - return '%s/%s' % (self._numerator, self._denominator) - - def _operator_fallbacks(monomorphic_operator, fallback_operator): - """Generates forward and reverse operators given a purely-rational - operator and a function from the operator module. - - Use this like: - __op__, __rop__ = _operator_fallbacks(just_rational_op, operator.op) - - In general, we want to implement the arithmetic operations so - that mixed-mode operations either call an implementation whose - author knew about the types of both arguments, or convert both - to the nearest built in type and do the operation there. In - Fraction, that means that we define __add__ and __radd__ as: - - def __add__(self, other): - # Both types have numerators/denominator attributes, - # so do the operation directly - if isinstance(other, (int, long, Fraction)): - return Fraction(self.numerator * other.denominator + - other.numerator * self.denominator, - self.denominator * other.denominator) - # float and complex don't have those operations, but we - # know about those types, so special case them. - elif isinstance(other, float): - return float(self) + other - elif isinstance(other, complex): - return complex(self) + other - # Let the other type take over. - return NotImplemented - - def __radd__(self, other): - # radd handles more types than add because there's - # nothing left to fall back to. - if isinstance(other, Rational): - return Fraction(self.numerator * other.denominator + - other.numerator * self.denominator, - self.denominator * other.denominator) - elif isinstance(other, Real): - return float(other) + float(self) - elif isinstance(other, Complex): - return complex(other) + complex(self) - return NotImplemented - - - There are 5 different cases for a mixed-type addition on - Fraction. I'll refer to all of the above code that doesn't - refer to Fraction, float, or complex as "boilerplate". 'r' - will be an instance of Fraction, which is a subtype of - Rational (r : Fraction <: Rational), and b : B <: - Complex. The first three involve 'r + b': - - 1. If B <: Fraction, int, float, or complex, we handle - that specially, and all is well. - 2. If Fraction falls back to the boilerplate code, and it - were to return a value from __add__, we'd miss the - possibility that B defines a more intelligent __radd__, - so the boilerplate should return NotImplemented from - __add__. In particular, we don't handle Rational - here, even though we could get an exact answer, in case - the other type wants to do something special. - 3. If B <: Fraction, Python tries B.__radd__ before - Fraction.__add__. This is ok, because it was - implemented with knowledge of Fraction, so it can - handle those instances before delegating to Real or - Complex. - - The next two situations describe 'b + r'. We assume that b - didn't know about Fraction in its implementation, and that it - uses similar boilerplate code: - - 4. If B <: Rational, then __radd_ converts both to the - builtin rational type (hey look, that's us) and - proceeds. - 5. Otherwise, __radd__ tries to find the nearest common - base ABC, and fall back to its builtin type. Since this - class doesn't subclass a concrete type, there's no - implementation to fall back to, so we need to try as - hard as possible to return an actual value, or the user - will get a TypeError. - - """ - def forward(a, b): - if isinstance(b, (int, long, Fraction)): - return monomorphic_operator(a, b) - elif isinstance(b, float): - return fallback_operator(float(a), b) - elif isinstance(b, complex): - return fallback_operator(complex(a), b) - else: - return NotImplemented - forward.__name__ = '__' + fallback_operator.__name__ + '__' - forward.__doc__ = monomorphic_operator.__doc__ - - def reverse(b, a): - if isinstance(a, Rational): - # Includes ints. - return monomorphic_operator(a, b) - elif isinstance(a, numbers.Real): - return fallback_operator(float(a), float(b)) - elif isinstance(a, numbers.Complex): - return fallback_operator(complex(a), complex(b)) - else: - return NotImplemented - reverse.__name__ = '__r' + fallback_operator.__name__ + '__' - reverse.__doc__ = monomorphic_operator.__doc__ - - return forward, reverse - - def _add(a, b): - """a + b""" - return Fraction(a.numerator * b.denominator + - b.numerator * a.denominator, - a.denominator * b.denominator) - - __add__, __radd__ = _operator_fallbacks(_add, operator.add) - - def _sub(a, b): - """a - b""" - return Fraction(a.numerator * b.denominator - - b.numerator * a.denominator, - a.denominator * b.denominator) - - __sub__, __rsub__ = _operator_fallbacks(_sub, operator.sub) - - def _mul(a, b): - """a * b""" - return Fraction(a.numerator * b.numerator, a.denominator * b.denominator) - - __mul__, __rmul__ = _operator_fallbacks(_mul, operator.mul) - - def _div(a, b): - """a / b""" - return Fraction(a.numerator * b.denominator, - a.denominator * b.numerator) - - __truediv__, __rtruediv__ = _operator_fallbacks(_div, operator.truediv) - __div__, __rdiv__ = _operator_fallbacks(_div, operator.div) - - def __floordiv__(a, b): - """a // b""" - # Will be math.floor(a / b) in 3.0. - div = a / b - if isinstance(div, Rational): - # trunc(math.floor(div)) doesn't work if the rational is - # more precise than a float because the intermediate - # rounding may cross an integer boundary. - return div.numerator // div.denominator - else: - return math.floor(div) - - def __rfloordiv__(b, a): - """a // b""" - # Will be math.floor(a / b) in 3.0. - div = a / b - if isinstance(div, Rational): - # trunc(math.floor(div)) doesn't work if the rational is - # more precise than a float because the intermediate - # rounding may cross an integer boundary. - return div.numerator // div.denominator - else: - return math.floor(div) - - def __mod__(a, b): - """a % b""" - div = a // b - return a - b * div - - def __rmod__(b, a): - """a % b""" - div = a // b - return a - b * div - - def __pow__(a, b): - """a ** b - - If b is not an integer, the result will be a float or complex - since roots are generally irrational. If b is an integer, the - result will be rational. - - """ - if isinstance(b, Rational): - if b.denominator == 1: - power = b.numerator - if power >= 0: - return Fraction(a._numerator ** power, - a._denominator ** power) - else: - return Fraction(a._denominator ** -power, - a._numerator ** -power) - else: - # A fractional power will generally produce an - # irrational number. - return float(a) ** float(b) - else: - return float(a) ** b - - def __rpow__(b, a): - """a ** b""" - if b._denominator == 1 and b._numerator >= 0: - # If a is an int, keep it that way if possible. - return a ** b._numerator - - if isinstance(a, Rational): - return Fraction(a.numerator, a.denominator) ** b - - if b._denominator == 1: - return a ** b._numerator - - return a ** float(b) - - def __pos__(a): - """+a: Coerces a subclass instance to Fraction""" - return Fraction(a._numerator, a._denominator) - - def __neg__(a): - """-a""" - return Fraction(-a._numerator, a._denominator) - - def __abs__(a): - """abs(a)""" - return Fraction(abs(a._numerator), a._denominator) - - def __trunc__(a): - """trunc(a)""" - if a._numerator < 0: - return -(-a._numerator // a._denominator) - else: - return a._numerator // a._denominator - - def __hash__(self): - """hash(self) - - Tricky because values that are exactly representable as a - float must have the same hash as that float. - - """ - # XXX since this method is expensive, consider caching the result - if self._denominator == 1: - # Get integers right. - return hash(self._numerator) - # Expensive check, but definitely correct. - if self == float(self): - return hash(float(self)) - else: - # Use tuple's hash to avoid a high collision rate on - # simple fractions. - return hash((self._numerator, self._denominator)) - - def __eq__(a, b): - """a == b""" - if isinstance(b, Rational): - return (a._numerator == b.numerator and - a._denominator == b.denominator) - if isinstance(b, numbers.Complex) and b.imag == 0: - b = b.real - if isinstance(b, float): - if math.isnan(b) or math.isinf(b): - # comparisons with an infinity or nan should behave in - # the same way for any finite a, so treat a as zero. - return 0.0 == b - else: - return a == a.from_float(b) - else: - # Since a doesn't know how to compare with b, let's give b - # a chance to compare itself with a. - return NotImplemented - - def _richcmp(self, other, op): - """Helper for comparison operators, for internal use only. - - Implement comparison between a Rational instance `self`, and - either another Rational instance or a float `other`. If - `other` is not a Rational instance or a float, return - NotImplemented. `op` should be one of the six standard - comparison operators. - - """ - # convert other to a Rational instance where reasonable. - if isinstance(other, Rational): - return op(self._numerator * other.denominator, - self._denominator * other.numerator) - # comparisons with complex should raise a TypeError, for consistency - # with int<->complex, float<->complex, and complex<->complex comparisons. - if isinstance(other, complex): - raise TypeError("no ordering relation is defined for complex numbers") - if isinstance(other, float): - if math.isnan(other) or math.isinf(other): - return op(0.0, other) - else: - return op(self, self.from_float(other)) - else: - return NotImplemented - - def __lt__(a, b): - """a < b""" - return a._richcmp(b, operator.lt) - - def __gt__(a, b): - """a > b""" - return a._richcmp(b, operator.gt) - - def __le__(a, b): - """a <= b""" - return a._richcmp(b, operator.le) - - def __ge__(a, b): - """a >= b""" - return a._richcmp(b, operator.ge) - - def __nonzero__(a): - """a != 0""" - return a._numerator != 0 - - # support for pickling, copy, and deepcopy - - def __reduce__(self): - return (self.__class__, (str(self),)) - - def __copy__(self): - if type(self) == Fraction: - return self # I'm immutable; therefore I am my own clone - return self.__class__(self._numerator, self._denominator) - - def __deepcopy__(self, memo): - if type(self) == Fraction: - return self # My components are also immutable - return self.__class__(self._numerator, self._denominator) diff --git a/python/Lib/ftplib.py b/python/Lib/ftplib.py deleted file mode 100755 index 09986fc3d4..0000000000 --- a/python/Lib/ftplib.py +++ /dev/null @@ -1,1078 +0,0 @@ -"""An FTP client class and some helper functions. - -Based on RFC 959: File Transfer Protocol (FTP), by J. Postel and J. Reynolds - -Example: - ->>> from ftplib import FTP ->>> ftp = FTP('ftp.python.org') # connect to host, default port ->>> ftp.login() # default, i.e.: user anonymous, passwd anonymous@ -'230 Guest login ok, access restrictions apply.' ->>> ftp.retrlines('LIST') # list directory contents -total 9 -drwxr-xr-x 8 root wheel 1024 Jan 3 1994 . -drwxr-xr-x 8 root wheel 1024 Jan 3 1994 .. -drwxr-xr-x 2 root wheel 1024 Jan 3 1994 bin -drwxr-xr-x 2 root wheel 1024 Jan 3 1994 etc -d-wxrwxr-x 2 ftp wheel 1024 Sep 5 13:43 incoming -drwxr-xr-x 2 root wheel 1024 Nov 17 1993 lib -drwxr-xr-x 6 1094 wheel 1024 Sep 13 19:07 pub -drwxr-xr-x 3 root wheel 1024 Jan 3 1994 usr --rw-r--r-- 1 root root 312 Aug 1 1994 welcome.msg -'226 Transfer complete.' ->>> ftp.quit() -'221 Goodbye.' ->>> - -A nice test that reveals some of the network dialogue would be: -python ftplib.py -d localhost -l -p -l -""" - -# -# Changes and improvements suggested by Steve Majewski. -# Modified by Jack to work on the mac. -# Modified by Siebren to support docstrings and PASV. -# Modified by Phil Schwartz to add storbinary and storlines callbacks. -# Modified by Giampaolo Rodola' to add TLS support. -# - -import os -import sys - -# Import SOCKS module if it exists, else standard socket module socket -try: - import SOCKS; socket = SOCKS; del SOCKS # import SOCKS as socket - from socket import getfqdn; socket.getfqdn = getfqdn; del getfqdn -except ImportError: - import socket -from socket import _GLOBAL_DEFAULT_TIMEOUT - -__all__ = ["FTP","Netrc"] - -# Magic number from -MSG_OOB = 0x1 # Process data out of band - - -# The standard FTP server control port -FTP_PORT = 21 -# The sizehint parameter passed to readline() calls -MAXLINE = 8192 - - -# Exception raised when an error or invalid response is received -class Error(Exception): pass -class error_reply(Error): pass # unexpected [123]xx reply -class error_temp(Error): pass # 4xx errors -class error_perm(Error): pass # 5xx errors -class error_proto(Error): pass # response does not begin with [1-5] - - -# All exceptions (hopefully) that may be raised here and that aren't -# (always) programming errors on our side -all_errors = (Error, IOError, EOFError) - - -# Line terminators (we always output CRLF, but accept any of CRLF, CR, LF) -CRLF = '\r\n' - -# The class itself -class FTP: - - '''An FTP client class. - - To create a connection, call the class using these arguments: - host, user, passwd, acct, timeout - - The first four arguments are all strings, and have default value ''. - timeout must be numeric and defaults to None if not passed, - meaning that no timeout will be set on any ftp socket(s) - If a timeout is passed, then this is now the default timeout for all ftp - socket operations for this instance. - - Then use self.connect() with optional host and port argument. - - To download a file, use ftp.retrlines('RETR ' + filename), - or ftp.retrbinary() with slightly different arguments. - To upload a file, use ftp.storlines() or ftp.storbinary(), - which have an open file as argument (see their definitions - below for details). - The download/upload functions first issue appropriate TYPE - and PORT or PASV commands. -''' - - debugging = 0 - host = '' - port = FTP_PORT - maxline = MAXLINE - sock = None - file = None - welcome = None - passiveserver = 1 - - # Initialization method (called by class instantiation). - # Initialize host to localhost, port to standard ftp port - # Optional arguments are host (for connect()), - # and user, passwd, acct (for login()) - def __init__(self, host='', user='', passwd='', acct='', - timeout=_GLOBAL_DEFAULT_TIMEOUT): - self.timeout = timeout - if host: - self.connect(host) - if user: - self.login(user, passwd, acct) - - def connect(self, host='', port=0, timeout=-999): - '''Connect to host. Arguments are: - - host: hostname to connect to (string, default previous host) - - port: port to connect to (integer, default previous port) - ''' - if host != '': - self.host = host - if port > 0: - self.port = port - if timeout != -999: - self.timeout = timeout - self.sock = socket.create_connection((self.host, self.port), self.timeout) - self.af = self.sock.family - self.file = self.sock.makefile('rb') - self.welcome = self.getresp() - return self.welcome - - def getwelcome(self): - '''Get the welcome message from the server. - (this is read and squirreled away by connect())''' - if self.debugging: - print '*welcome*', self.sanitize(self.welcome) - return self.welcome - - def set_debuglevel(self, level): - '''Set the debugging level. - The required argument level means: - 0: no debugging output (default) - 1: print commands and responses but not body text etc. - 2: also print raw lines read and sent before stripping CR/LF''' - self.debugging = level - debug = set_debuglevel - - def set_pasv(self, val): - '''Use passive or active mode for data transfers. - With a false argument, use the normal PORT mode, - With a true argument, use the PASV command.''' - self.passiveserver = val - - # Internal: "sanitize" a string for printing - def sanitize(self, s): - if s[:5] == 'pass ' or s[:5] == 'PASS ': - i = len(s) - while i > 5 and s[i-1] in '\r\n': - i = i-1 - s = s[:5] + '*'*(i-5) + s[i:] - return repr(s) - - # Internal: send one line to the server, appending CRLF - def putline(self, line): - line = line + CRLF - if self.debugging > 1: print '*put*', self.sanitize(line) - self.sock.sendall(line) - - # Internal: send one command to the server (through putline()) - def putcmd(self, line): - if self.debugging: print '*cmd*', self.sanitize(line) - self.putline(line) - - # Internal: return one line from the server, stripping CRLF. - # Raise EOFError if the connection is closed - def getline(self): - line = self.file.readline(self.maxline + 1) - if len(line) > self.maxline: - raise Error("got more than %d bytes" % self.maxline) - if self.debugging > 1: - print '*get*', self.sanitize(line) - if not line: raise EOFError - if line[-2:] == CRLF: line = line[:-2] - elif line[-1:] in CRLF: line = line[:-1] - return line - - # Internal: get a response from the server, which may possibly - # consist of multiple lines. Return a single string with no - # trailing CRLF. If the response consists of multiple lines, - # these are separated by '\n' characters in the string - def getmultiline(self): - line = self.getline() - if line[3:4] == '-': - code = line[:3] - while 1: - nextline = self.getline() - line = line + ('\n' + nextline) - if nextline[:3] == code and \ - nextline[3:4] != '-': - break - return line - - # Internal: get a response from the server. - # Raise various errors if the response indicates an error - def getresp(self): - resp = self.getmultiline() - if self.debugging: print '*resp*', self.sanitize(resp) - self.lastresp = resp[:3] - c = resp[:1] - if c in ('1', '2', '3'): - return resp - if c == '4': - raise error_temp, resp - if c == '5': - raise error_perm, resp - raise error_proto, resp - - def voidresp(self): - """Expect a response beginning with '2'.""" - resp = self.getresp() - if resp[:1] != '2': - raise error_reply, resp - return resp - - def abort(self): - '''Abort a file transfer. Uses out-of-band data. - This does not follow the procedure from the RFC to send Telnet - IP and Synch; that doesn't seem to work with the servers I've - tried. Instead, just send the ABOR command as OOB data.''' - line = 'ABOR' + CRLF - if self.debugging > 1: print '*put urgent*', self.sanitize(line) - self.sock.sendall(line, MSG_OOB) - resp = self.getmultiline() - if resp[:3] not in ('426', '225', '226'): - raise error_proto, resp - - def sendcmd(self, cmd): - '''Send a command and return the response.''' - self.putcmd(cmd) - return self.getresp() - - def voidcmd(self, cmd): - """Send a command and expect a response beginning with '2'.""" - self.putcmd(cmd) - return self.voidresp() - - def sendport(self, host, port): - '''Send a PORT command with the current host and the given - port number. - ''' - hbytes = host.split('.') - pbytes = [repr(port//256), repr(port%256)] - bytes = hbytes + pbytes - cmd = 'PORT ' + ','.join(bytes) - return self.voidcmd(cmd) - - def sendeprt(self, host, port): - '''Send an EPRT command with the current host and the given port number.''' - af = 0 - if self.af == socket.AF_INET: - af = 1 - if self.af == socket.AF_INET6: - af = 2 - if af == 0: - raise error_proto, 'unsupported address family' - fields = ['', repr(af), host, repr(port), ''] - cmd = 'EPRT ' + '|'.join(fields) - return self.voidcmd(cmd) - - def makeport(self): - '''Create a new socket and send a PORT command for it.''' - err = None - sock = None - for res in socket.getaddrinfo(None, 0, self.af, socket.SOCK_STREAM, 0, socket.AI_PASSIVE): - af, socktype, proto, canonname, sa = res - try: - sock = socket.socket(af, socktype, proto) - sock.bind(sa) - except socket.error, err: - if sock: - sock.close() - sock = None - continue - break - if sock is None: - if err is not None: - raise err - else: - raise socket.error("getaddrinfo returns an empty list") - sock.listen(1) - port = sock.getsockname()[1] # Get proper port - host = self.sock.getsockname()[0] # Get proper host - if self.af == socket.AF_INET: - resp = self.sendport(host, port) - else: - resp = self.sendeprt(host, port) - if self.timeout is not _GLOBAL_DEFAULT_TIMEOUT: - sock.settimeout(self.timeout) - return sock - - def makepasv(self): - if self.af == socket.AF_INET: - host, port = parse227(self.sendcmd('PASV')) - else: - host, port = parse229(self.sendcmd('EPSV'), self.sock.getpeername()) - return host, port - - def ntransfercmd(self, cmd, rest=None): - """Initiate a transfer over the data connection. - - If the transfer is active, send a port command and the - transfer command, and accept the connection. If the server is - passive, send a pasv command, connect to it, and start the - transfer command. Either way, return the socket for the - connection and the expected size of the transfer. The - expected size may be None if it could not be determined. - - Optional `rest' argument can be a string that is sent as the - argument to a REST command. This is essentially a server - marker used to tell the server to skip over any data up to the - given marker. - """ - size = None - if self.passiveserver: - host, port = self.makepasv() - conn = socket.create_connection((host, port), self.timeout) - try: - if rest is not None: - self.sendcmd("REST %s" % rest) - resp = self.sendcmd(cmd) - # Some servers apparently send a 200 reply to - # a LIST or STOR command, before the 150 reply - # (and way before the 226 reply). This seems to - # be in violation of the protocol (which only allows - # 1xx or error messages for LIST), so we just discard - # this response. - if resp[0] == '2': - resp = self.getresp() - if resp[0] != '1': - raise error_reply, resp - except: - conn.close() - raise - else: - sock = self.makeport() - try: - if rest is not None: - self.sendcmd("REST %s" % rest) - resp = self.sendcmd(cmd) - # See above. - if resp[0] == '2': - resp = self.getresp() - if resp[0] != '1': - raise error_reply, resp - conn, sockaddr = sock.accept() - if self.timeout is not _GLOBAL_DEFAULT_TIMEOUT: - conn.settimeout(self.timeout) - finally: - sock.close() - if resp[:3] == '150': - # this is conditional in case we received a 125 - size = parse150(resp) - return conn, size - - def transfercmd(self, cmd, rest=None): - """Like ntransfercmd() but returns only the socket.""" - return self.ntransfercmd(cmd, rest)[0] - - def login(self, user = '', passwd = '', acct = ''): - '''Login, default anonymous.''' - if not user: user = 'anonymous' - if not passwd: passwd = '' - if not acct: acct = '' - if user == 'anonymous' and passwd in ('', '-'): - # If there is no anonymous ftp password specified - # then we'll just use anonymous@ - # We don't send any other thing because: - # - We want to remain anonymous - # - We want to stop SPAM - # - We don't want to let ftp sites to discriminate by the user, - # host or country. - passwd = passwd + 'anonymous@' - resp = self.sendcmd('USER ' + user) - if resp[0] == '3': resp = self.sendcmd('PASS ' + passwd) - if resp[0] == '3': resp = self.sendcmd('ACCT ' + acct) - if resp[0] != '2': - raise error_reply, resp - return resp - - def retrbinary(self, cmd, callback, blocksize=8192, rest=None): - """Retrieve data in binary mode. A new port is created for you. - - Args: - cmd: A RETR command. - callback: A single parameter callable to be called on each - block of data read. - blocksize: The maximum number of bytes to read from the - socket at one time. [default: 8192] - rest: Passed to transfercmd(). [default: None] - - Returns: - The response code. - """ - self.voidcmd('TYPE I') - conn = self.transfercmd(cmd, rest) - while 1: - data = conn.recv(blocksize) - if not data: - break - callback(data) - conn.close() - return self.voidresp() - - def retrlines(self, cmd, callback = None): - """Retrieve data in line mode. A new port is created for you. - - Args: - cmd: A RETR, LIST, NLST, or MLSD command. - callback: An optional single parameter callable that is called - for each line with the trailing CRLF stripped. - [default: print_line()] - - Returns: - The response code. - """ - if callback is None: callback = print_line - resp = self.sendcmd('TYPE A') - conn = self.transfercmd(cmd) - fp = conn.makefile('rb') - while 1: - line = fp.readline(self.maxline + 1) - if len(line) > self.maxline: - raise Error("got more than %d bytes" % self.maxline) - if self.debugging > 2: print '*retr*', repr(line) - if not line: - break - if line[-2:] == CRLF: - line = line[:-2] - elif line[-1:] == '\n': - line = line[:-1] - callback(line) - fp.close() - conn.close() - return self.voidresp() - - def storbinary(self, cmd, fp, blocksize=8192, callback=None, rest=None): - """Store a file in binary mode. A new port is created for you. - - Args: - cmd: A STOR command. - fp: A file-like object with a read(num_bytes) method. - blocksize: The maximum data size to read from fp and send over - the connection at once. [default: 8192] - callback: An optional single parameter callable that is called on - each block of data after it is sent. [default: None] - rest: Passed to transfercmd(). [default: None] - - Returns: - The response code. - """ - self.voidcmd('TYPE I') - conn = self.transfercmd(cmd, rest) - while 1: - buf = fp.read(blocksize) - if not buf: break - conn.sendall(buf) - if callback: callback(buf) - conn.close() - return self.voidresp() - - def storlines(self, cmd, fp, callback=None): - """Store a file in line mode. A new port is created for you. - - Args: - cmd: A STOR command. - fp: A file-like object with a readline() method. - callback: An optional single parameter callable that is called on - each line after it is sent. [default: None] - - Returns: - The response code. - """ - self.voidcmd('TYPE A') - conn = self.transfercmd(cmd) - while 1: - buf = fp.readline(self.maxline + 1) - if len(buf) > self.maxline: - raise Error("got more than %d bytes" % self.maxline) - if not buf: break - if buf[-2:] != CRLF: - if buf[-1] in CRLF: buf = buf[:-1] - buf = buf + CRLF - conn.sendall(buf) - if callback: callback(buf) - conn.close() - return self.voidresp() - - def acct(self, password): - '''Send new account name.''' - cmd = 'ACCT ' + password - return self.voidcmd(cmd) - - def nlst(self, *args): - '''Return a list of files in a given directory (default the current).''' - cmd = 'NLST' - for arg in args: - cmd = cmd + (' ' + arg) - files = [] - self.retrlines(cmd, files.append) - return files - - def dir(self, *args): - '''List a directory in long form. - By default list current directory to stdout. - Optional last argument is callback function; all - non-empty arguments before it are concatenated to the - LIST command. (This *should* only be used for a pathname.)''' - cmd = 'LIST' - func = None - if args[-1:] and type(args[-1]) != type(''): - args, func = args[:-1], args[-1] - for arg in args: - if arg: - cmd = cmd + (' ' + arg) - self.retrlines(cmd, func) - - def rename(self, fromname, toname): - '''Rename a file.''' - resp = self.sendcmd('RNFR ' + fromname) - if resp[0] != '3': - raise error_reply, resp - return self.voidcmd('RNTO ' + toname) - - def delete(self, filename): - '''Delete a file.''' - resp = self.sendcmd('DELE ' + filename) - if resp[:3] in ('250', '200'): - return resp - else: - raise error_reply, resp - - def cwd(self, dirname): - '''Change to a directory.''' - if dirname == '..': - try: - return self.voidcmd('CDUP') - except error_perm, msg: - if msg.args[0][:3] != '500': - raise - elif dirname == '': - dirname = '.' # does nothing, but could return error - cmd = 'CWD ' + dirname - return self.voidcmd(cmd) - - def size(self, filename): - '''Retrieve the size of a file.''' - # The SIZE command is defined in RFC-3659 - resp = self.sendcmd('SIZE ' + filename) - if resp[:3] == '213': - s = resp[3:].strip() - try: - return int(s) - except (OverflowError, ValueError): - return long(s) - - def mkd(self, dirname): - '''Make a directory, return its full pathname.''' - resp = self.sendcmd('MKD ' + dirname) - return parse257(resp) - - def rmd(self, dirname): - '''Remove a directory.''' - return self.voidcmd('RMD ' + dirname) - - def pwd(self): - '''Return current working directory.''' - resp = self.sendcmd('PWD') - return parse257(resp) - - def quit(self): - '''Quit, and close the connection.''' - resp = self.voidcmd('QUIT') - self.close() - return resp - - def close(self): - '''Close the connection without assuming anything about it.''' - try: - file = self.file - self.file = None - if file is not None: - file.close() - finally: - sock = self.sock - self.sock = None - if sock is not None: - sock.close() - -try: - import ssl -except ImportError: - pass -else: - class FTP_TLS(FTP): - '''A FTP subclass which adds TLS support to FTP as described - in RFC-4217. - - Connect as usual to port 21 implicitly securing the FTP control - connection before authenticating. - - Securing the data connection requires user to explicitly ask - for it by calling prot_p() method. - - Usage example: - >>> from ftplib import FTP_TLS - >>> ftps = FTP_TLS('ftp.python.org') - >>> ftps.login() # login anonymously previously securing control channel - '230 Guest login ok, access restrictions apply.' - >>> ftps.prot_p() # switch to secure data connection - '200 Protection level set to P' - >>> ftps.retrlines('LIST') # list directory content securely - total 9 - drwxr-xr-x 8 root wheel 1024 Jan 3 1994 . - drwxr-xr-x 8 root wheel 1024 Jan 3 1994 .. - drwxr-xr-x 2 root wheel 1024 Jan 3 1994 bin - drwxr-xr-x 2 root wheel 1024 Jan 3 1994 etc - d-wxrwxr-x 2 ftp wheel 1024 Sep 5 13:43 incoming - drwxr-xr-x 2 root wheel 1024 Nov 17 1993 lib - drwxr-xr-x 6 1094 wheel 1024 Sep 13 19:07 pub - drwxr-xr-x 3 root wheel 1024 Jan 3 1994 usr - -rw-r--r-- 1 root root 312 Aug 1 1994 welcome.msg - '226 Transfer complete.' - >>> ftps.quit() - '221 Goodbye.' - >>> - ''' - ssl_version = ssl.PROTOCOL_SSLv23 - - def __init__(self, host='', user='', passwd='', acct='', keyfile=None, - certfile=None, context=None, - timeout=_GLOBAL_DEFAULT_TIMEOUT, source_address=None): - if context is not None and keyfile is not None: - raise ValueError("context and keyfile arguments are mutually " - "exclusive") - if context is not None and certfile is not None: - raise ValueError("context and certfile arguments are mutually " - "exclusive") - self.keyfile = keyfile - self.certfile = certfile - if context is None: - context = ssl._create_stdlib_context(self.ssl_version, - certfile=certfile, - keyfile=keyfile) - self.context = context - self._prot_p = False - FTP.__init__(self, host, user, passwd, acct, timeout) - - def login(self, user='', passwd='', acct='', secure=True): - if secure and not isinstance(self.sock, ssl.SSLSocket): - self.auth() - return FTP.login(self, user, passwd, acct) - - def auth(self): - '''Set up secure control connection by using TLS/SSL.''' - if isinstance(self.sock, ssl.SSLSocket): - raise ValueError("Already using TLS") - if self.ssl_version >= ssl.PROTOCOL_SSLv23: - resp = self.voidcmd('AUTH TLS') - else: - resp = self.voidcmd('AUTH SSL') - self.sock = self.context.wrap_socket(self.sock, - server_hostname=self.host) - self.file = self.sock.makefile(mode='rb') - return resp - - def prot_p(self): - '''Set up secure data connection.''' - # PROT defines whether or not the data channel is to be protected. - # Though RFC-2228 defines four possible protection levels, - # RFC-4217 only recommends two, Clear and Private. - # Clear (PROT C) means that no security is to be used on the - # data-channel, Private (PROT P) means that the data-channel - # should be protected by TLS. - # PBSZ command MUST still be issued, but must have a parameter of - # '0' to indicate that no buffering is taking place and the data - # connection should not be encapsulated. - self.voidcmd('PBSZ 0') - resp = self.voidcmd('PROT P') - self._prot_p = True - return resp - - def prot_c(self): - '''Set up clear text data connection.''' - resp = self.voidcmd('PROT C') - self._prot_p = False - return resp - - # --- Overridden FTP methods - - def ntransfercmd(self, cmd, rest=None): - conn, size = FTP.ntransfercmd(self, cmd, rest) - if self._prot_p: - conn = self.context.wrap_socket(conn, - server_hostname=self.host) - return conn, size - - def retrbinary(self, cmd, callback, blocksize=8192, rest=None): - self.voidcmd('TYPE I') - conn = self.transfercmd(cmd, rest) - try: - while 1: - data = conn.recv(blocksize) - if not data: - break - callback(data) - # shutdown ssl layer - if isinstance(conn, ssl.SSLSocket): - conn.unwrap() - finally: - conn.close() - return self.voidresp() - - def retrlines(self, cmd, callback = None): - if callback is None: callback = print_line - resp = self.sendcmd('TYPE A') - conn = self.transfercmd(cmd) - fp = conn.makefile('rb') - try: - while 1: - line = fp.readline(self.maxline + 1) - if len(line) > self.maxline: - raise Error("got more than %d bytes" % self.maxline) - if self.debugging > 2: print '*retr*', repr(line) - if not line: - break - if line[-2:] == CRLF: - line = line[:-2] - elif line[-1:] == '\n': - line = line[:-1] - callback(line) - # shutdown ssl layer - if isinstance(conn, ssl.SSLSocket): - conn.unwrap() - finally: - fp.close() - conn.close() - return self.voidresp() - - def storbinary(self, cmd, fp, blocksize=8192, callback=None, rest=None): - self.voidcmd('TYPE I') - conn = self.transfercmd(cmd, rest) - try: - while 1: - buf = fp.read(blocksize) - if not buf: break - conn.sendall(buf) - if callback: callback(buf) - # shutdown ssl layer - if isinstance(conn, ssl.SSLSocket): - conn.unwrap() - finally: - conn.close() - return self.voidresp() - - def storlines(self, cmd, fp, callback=None): - self.voidcmd('TYPE A') - conn = self.transfercmd(cmd) - try: - while 1: - buf = fp.readline(self.maxline + 1) - if len(buf) > self.maxline: - raise Error("got more than %d bytes" % self.maxline) - if not buf: break - if buf[-2:] != CRLF: - if buf[-1] in CRLF: buf = buf[:-1] - buf = buf + CRLF - conn.sendall(buf) - if callback: callback(buf) - # shutdown ssl layer - if isinstance(conn, ssl.SSLSocket): - conn.unwrap() - finally: - conn.close() - return self.voidresp() - - __all__.append('FTP_TLS') - all_errors = (Error, IOError, EOFError, ssl.SSLError) - - -_150_re = None - -def parse150(resp): - '''Parse the '150' response for a RETR request. - Returns the expected transfer size or None; size is not guaranteed to - be present in the 150 message. - ''' - if resp[:3] != '150': - raise error_reply, resp - global _150_re - if _150_re is None: - import re - _150_re = re.compile("150 .* \((\d+) bytes\)", re.IGNORECASE) - m = _150_re.match(resp) - if not m: - return None - s = m.group(1) - try: - return int(s) - except (OverflowError, ValueError): - return long(s) - - -_227_re = None - -def parse227(resp): - '''Parse the '227' response for a PASV request. - Raises error_proto if it does not contain '(h1,h2,h3,h4,p1,p2)' - Return ('host.addr.as.numbers', port#) tuple.''' - - if resp[:3] != '227': - raise error_reply, resp - global _227_re - if _227_re is None: - import re - _227_re = re.compile(r'(\d+),(\d+),(\d+),(\d+),(\d+),(\d+)') - m = _227_re.search(resp) - if not m: - raise error_proto, resp - numbers = m.groups() - host = '.'.join(numbers[:4]) - port = (int(numbers[4]) << 8) + int(numbers[5]) - return host, port - - -def parse229(resp, peer): - '''Parse the '229' response for an EPSV request. - Raises error_proto if it does not contain '(|||port|)' - Return ('host.addr.as.numbers', port#) tuple.''' - - if resp[:3] != '229': - raise error_reply, resp - left = resp.find('(') - if left < 0: raise error_proto, resp - right = resp.find(')', left + 1) - if right < 0: - raise error_proto, resp # should contain '(|||port|)' - if resp[left + 1] != resp[right - 1]: - raise error_proto, resp - parts = resp[left + 1:right].split(resp[left+1]) - if len(parts) != 5: - raise error_proto, resp - host = peer[0] - port = int(parts[3]) - return host, port - - -def parse257(resp): - '''Parse the '257' response for a MKD or PWD request. - This is a response to a MKD or PWD request: a directory name. - Returns the directoryname in the 257 reply.''' - - if resp[:3] != '257': - raise error_reply, resp - if resp[3:5] != ' "': - return '' # Not compliant to RFC 959, but UNIX ftpd does this - dirname = '' - i = 5 - n = len(resp) - while i < n: - c = resp[i] - i = i+1 - if c == '"': - if i >= n or resp[i] != '"': - break - i = i+1 - dirname = dirname + c - return dirname - - -def print_line(line): - '''Default retrlines callback to print a line.''' - print line - - -def ftpcp(source, sourcename, target, targetname = '', type = 'I'): - '''Copy file from one FTP-instance to another.''' - if not targetname: targetname = sourcename - type = 'TYPE ' + type - source.voidcmd(type) - target.voidcmd(type) - sourcehost, sourceport = parse227(source.sendcmd('PASV')) - target.sendport(sourcehost, sourceport) - # RFC 959: the user must "listen" [...] BEFORE sending the - # transfer request. - # So: STOR before RETR, because here the target is a "user". - treply = target.sendcmd('STOR ' + targetname) - if treply[:3] not in ('125', '150'): raise error_proto # RFC 959 - sreply = source.sendcmd('RETR ' + sourcename) - if sreply[:3] not in ('125', '150'): raise error_proto # RFC 959 - source.voidresp() - target.voidresp() - - -class Netrc: - """Class to parse & provide access to 'netrc' format files. - - See the netrc(4) man page for information on the file format. - - WARNING: This class is obsolete -- use module netrc instead. - - """ - __defuser = None - __defpasswd = None - __defacct = None - - def __init__(self, filename=None): - if filename is None: - if "HOME" in os.environ: - filename = os.path.join(os.environ["HOME"], - ".netrc") - else: - raise IOError, \ - "specify file to load or set $HOME" - self.__hosts = {} - self.__macros = {} - fp = open(filename, "r") - in_macro = 0 - while 1: - line = fp.readline(self.maxline + 1) - if len(line) > self.maxline: - raise Error("got more than %d bytes" % self.maxline) - if not line: break - if in_macro and line.strip(): - macro_lines.append(line) - continue - elif in_macro: - self.__macros[macro_name] = tuple(macro_lines) - in_macro = 0 - words = line.split() - host = user = passwd = acct = None - default = 0 - i = 0 - while i < len(words): - w1 = words[i] - if i+1 < len(words): - w2 = words[i + 1] - else: - w2 = None - if w1 == 'default': - default = 1 - elif w1 == 'machine' and w2: - host = w2.lower() - i = i + 1 - elif w1 == 'login' and w2: - user = w2 - i = i + 1 - elif w1 == 'password' and w2: - passwd = w2 - i = i + 1 - elif w1 == 'account' and w2: - acct = w2 - i = i + 1 - elif w1 == 'macdef' and w2: - macro_name = w2 - macro_lines = [] - in_macro = 1 - break - i = i + 1 - if default: - self.__defuser = user or self.__defuser - self.__defpasswd = passwd or self.__defpasswd - self.__defacct = acct or self.__defacct - if host: - if host in self.__hosts: - ouser, opasswd, oacct = \ - self.__hosts[host] - user = user or ouser - passwd = passwd or opasswd - acct = acct or oacct - self.__hosts[host] = user, passwd, acct - fp.close() - - def get_hosts(self): - """Return a list of hosts mentioned in the .netrc file.""" - return self.__hosts.keys() - - def get_account(self, host): - """Returns login information for the named host. - - The return value is a triple containing userid, - password, and the accounting field. - - """ - host = host.lower() - user = passwd = acct = None - if host in self.__hosts: - user, passwd, acct = self.__hosts[host] - user = user or self.__defuser - passwd = passwd or self.__defpasswd - acct = acct or self.__defacct - return user, passwd, acct - - def get_macros(self): - """Return a list of all defined macro names.""" - return self.__macros.keys() - - def get_macro(self, macro): - """Return a sequence of lines which define a named macro.""" - return self.__macros[macro] - - - -def test(): - '''Test program. - Usage: ftp [-d] [-r[file]] host [-l[dir]] [-d[dir]] [-p] [file] ... - - -d dir - -l list - -p password - ''' - - if len(sys.argv) < 2: - print test.__doc__ - sys.exit(0) - - debugging = 0 - rcfile = None - while sys.argv[1] == '-d': - debugging = debugging+1 - del sys.argv[1] - if sys.argv[1][:2] == '-r': - # get name of alternate ~/.netrc file: - rcfile = sys.argv[1][2:] - del sys.argv[1] - host = sys.argv[1] - ftp = FTP(host) - ftp.set_debuglevel(debugging) - userid = passwd = acct = '' - try: - netrc = Netrc(rcfile) - except IOError: - if rcfile is not None: - sys.stderr.write("Could not open account file" - " -- using anonymous login.") - else: - try: - userid, passwd, acct = netrc.get_account(host) - except KeyError: - # no account for host - sys.stderr.write( - "No account -- using anonymous login.") - ftp.login(userid, passwd, acct) - for file in sys.argv[2:]: - if file[:2] == '-l': - ftp.dir(file[2:]) - elif file[:2] == '-d': - cmd = 'CWD' - if file[2:]: cmd = cmd + ' ' + file[2:] - resp = ftp.sendcmd(cmd) - elif file == '-p': - ftp.set_pasv(not ftp.passiveserver) - else: - ftp.retrbinary('RETR ' + file, \ - sys.stdout.write, 1024) - ftp.quit() - - -if __name__ == '__main__': - test() diff --git a/python/Lib/functools.py b/python/Lib/functools.py deleted file mode 100755 index 53680b8946..0000000000 --- a/python/Lib/functools.py +++ /dev/null @@ -1,100 +0,0 @@ -"""functools.py - Tools for working with functions and callable objects -""" -# Python module wrapper for _functools C module -# to allow utilities written in Python to be added -# to the functools module. -# Written by Nick Coghlan -# Copyright (C) 2006 Python Software Foundation. -# See C source code for _functools credits/copyright - -from _functools import partial, reduce - -# update_wrapper() and wraps() are tools to help write -# wrapper functions that can handle naive introspection - -WRAPPER_ASSIGNMENTS = ('__module__', '__name__', '__doc__') -WRAPPER_UPDATES = ('__dict__',) -def update_wrapper(wrapper, - wrapped, - assigned = WRAPPER_ASSIGNMENTS, - updated = WRAPPER_UPDATES): - """Update a wrapper function to look like the wrapped function - - wrapper is the function to be updated - wrapped is the original function - assigned is a tuple naming the attributes assigned directly - from the wrapped function to the wrapper function (defaults to - functools.WRAPPER_ASSIGNMENTS) - updated is a tuple naming the attributes of the wrapper that - are updated with the corresponding attribute from the wrapped - function (defaults to functools.WRAPPER_UPDATES) - """ - for attr in assigned: - setattr(wrapper, attr, getattr(wrapped, attr)) - for attr in updated: - getattr(wrapper, attr).update(getattr(wrapped, attr, {})) - # Return the wrapper so this can be used as a decorator via partial() - return wrapper - -def wraps(wrapped, - assigned = WRAPPER_ASSIGNMENTS, - updated = WRAPPER_UPDATES): - """Decorator factory to apply update_wrapper() to a wrapper function - - Returns a decorator that invokes update_wrapper() with the decorated - function as the wrapper argument and the arguments to wraps() as the - remaining arguments. Default arguments are as for update_wrapper(). - This is a convenience function to simplify applying partial() to - update_wrapper(). - """ - return partial(update_wrapper, wrapped=wrapped, - assigned=assigned, updated=updated) - -def total_ordering(cls): - """Class decorator that fills in missing ordering methods""" - convert = { - '__lt__': [('__gt__', lambda self, other: not (self < other or self == other)), - ('__le__', lambda self, other: self < other or self == other), - ('__ge__', lambda self, other: not self < other)], - '__le__': [('__ge__', lambda self, other: not self <= other or self == other), - ('__lt__', lambda self, other: self <= other and not self == other), - ('__gt__', lambda self, other: not self <= other)], - '__gt__': [('__lt__', lambda self, other: not (self > other or self == other)), - ('__ge__', lambda self, other: self > other or self == other), - ('__le__', lambda self, other: not self > other)], - '__ge__': [('__le__', lambda self, other: (not self >= other) or self == other), - ('__gt__', lambda self, other: self >= other and not self == other), - ('__lt__', lambda self, other: not self >= other)] - } - roots = set(dir(cls)) & set(convert) - if not roots: - raise ValueError('must define at least one ordering operation: < > <= >=') - root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__ - for opname, opfunc in convert[root]: - if opname not in roots: - opfunc.__name__ = opname - opfunc.__doc__ = getattr(int, opname).__doc__ - setattr(cls, opname, opfunc) - return cls - -def cmp_to_key(mycmp): - """Convert a cmp= function into a key= function""" - class K(object): - __slots__ = ['obj'] - def __init__(self, obj, *args): - self.obj = obj - def __lt__(self, other): - return mycmp(self.obj, other.obj) < 0 - def __gt__(self, other): - return mycmp(self.obj, other.obj) > 0 - def __eq__(self, other): - return mycmp(self.obj, other.obj) == 0 - def __le__(self, other): - return mycmp(self.obj, other.obj) <= 0 - def __ge__(self, other): - return mycmp(self.obj, other.obj) >= 0 - def __ne__(self, other): - return mycmp(self.obj, other.obj) != 0 - def __hash__(self): - raise TypeError('hash not implemented') - return K diff --git a/python/Lib/genericpath.py b/python/Lib/genericpath.py deleted file mode 100755 index 2648e5457e..0000000000 --- a/python/Lib/genericpath.py +++ /dev/null @@ -1,113 +0,0 @@ -""" -Path operations common to more than one OS -Do not use directly. The OS specific modules import the appropriate -functions from this module themselves. -""" -import os -import stat - -__all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', - 'getsize', 'isdir', 'isfile'] - - -try: - _unicode = unicode -except NameError: - # If Python is built without Unicode support, the unicode type - # will not exist. Fake one. - class _unicode(object): - pass - -# Does a path exist? -# This is false for dangling symbolic links on systems that support them. -def exists(path): - """Test whether a path exists. Returns False for broken symbolic links""" - try: - os.stat(path) - except os.error: - return False - return True - - -# This follows symbolic links, so both islink() and isdir() can be true -# for the same path on systems that support symlinks -def isfile(path): - """Test whether a path is a regular file""" - try: - st = os.stat(path) - except os.error: - return False - return stat.S_ISREG(st.st_mode) - - -# Is a path a directory? -# This follows symbolic links, so both islink() and isdir() -# can be true for the same path on systems that support symlinks -def isdir(s): - """Return true if the pathname refers to an existing directory.""" - try: - st = os.stat(s) - except os.error: - return False - return stat.S_ISDIR(st.st_mode) - - -def getsize(filename): - """Return the size of a file, reported by os.stat().""" - return os.stat(filename).st_size - - -def getmtime(filename): - """Return the last modification time of a file, reported by os.stat().""" - return os.stat(filename).st_mtime - - -def getatime(filename): - """Return the last access time of a file, reported by os.stat().""" - return os.stat(filename).st_atime - - -def getctime(filename): - """Return the metadata change time of a file, reported by os.stat().""" - return os.stat(filename).st_ctime - - -# Return the longest prefix of all list elements. -def commonprefix(m): - "Given a list of pathnames, returns the longest common leading component" - if not m: return '' - s1 = min(m) - s2 = max(m) - for i, c in enumerate(s1): - if c != s2[i]: - return s1[:i] - return s1 - -# Split a path in root and extension. -# The extension is everything starting at the last dot in the last -# pathname component; the root is everything before that. -# It is always true that root + ext == p. - -# Generic implementation of splitext, to be parametrized with -# the separators -def _splitext(p, sep, altsep, extsep): - """Split the extension from a pathname. - - Extension is everything from the last dot to the end, ignoring - leading dots. Returns "(root, ext)"; ext may be empty.""" - - sepIndex = p.rfind(sep) - if altsep: - altsepIndex = p.rfind(altsep) - sepIndex = max(sepIndex, altsepIndex) - - dotIndex = p.rfind(extsep) - if dotIndex > sepIndex: - # skip all leading dots - filenameIndex = sepIndex + 1 - while filenameIndex < dotIndex: - if p[filenameIndex] != extsep: - return p[:dotIndex], p[dotIndex:] - filenameIndex += 1 - - return p, '' diff --git a/python/Lib/getopt.py b/python/Lib/getopt.py deleted file mode 100755 index 084aadb52a..0000000000 --- a/python/Lib/getopt.py +++ /dev/null @@ -1,210 +0,0 @@ -"""Parser for command line options. - -This module helps scripts to parse the command line arguments in -sys.argv. It supports the same conventions as the Unix getopt() -function (including the special meanings of arguments of the form `-' -and `--'). Long options similar to those supported by GNU software -may be used as well via an optional third argument. This module -provides two functions and an exception: - -getopt() -- Parse command line options -gnu_getopt() -- Like getopt(), but allow option and non-option arguments -to be intermixed. -GetoptError -- exception (class) raised with 'opt' attribute, which is the -option involved with the exception. -""" - -# Long option support added by Lars Wirzenius . -# -# Gerrit Holl moved the string-based exceptions -# to class-based exceptions. -# -# Peter Astrand added gnu_getopt(). -# -# TODO for gnu_getopt(): -# -# - GNU getopt_long_only mechanism -# - allow the caller to specify ordering -# - RETURN_IN_ORDER option -# - GNU extension with '-' as first character of option string -# - optional arguments, specified by double colons -# - an option string with a W followed by semicolon should -# treat "-W foo" as "--foo" - -__all__ = ["GetoptError","error","getopt","gnu_getopt"] - -import os - -class GetoptError(Exception): - opt = '' - msg = '' - def __init__(self, msg, opt=''): - self.msg = msg - self.opt = opt - Exception.__init__(self, msg, opt) - - def __str__(self): - return self.msg - -error = GetoptError # backward compatibility - -def getopt(args, shortopts, longopts = []): - """getopt(args, options[, long_options]) -> opts, args - - Parses command line options and parameter list. args is the - argument list to be parsed, without the leading reference to the - running program. Typically, this means "sys.argv[1:]". shortopts - is the string of option letters that the script wants to - recognize, with options that require an argument followed by a - colon (i.e., the same format that Unix getopt() uses). If - specified, longopts is a list of strings with the names of the - long options which should be supported. The leading '--' - characters should not be included in the option name. Options - which require an argument should be followed by an equal sign - ('='). - - The return value consists of two elements: the first is a list of - (option, value) pairs; the second is the list of program arguments - left after the option list was stripped (this is a trailing slice - of the first argument). Each option-and-value pair returned has - the option as its first element, prefixed with a hyphen (e.g., - '-x'), and the option argument as its second element, or an empty - string if the option has no argument. The options occur in the - list in the same order in which they were found, thus allowing - multiple occurrences. Long and short options may be mixed. - - """ - - opts = [] - if type(longopts) == type(""): - longopts = [longopts] - else: - longopts = list(longopts) - while args and args[0].startswith('-') and args[0] != '-': - if args[0] == '--': - args = args[1:] - break - if args[0].startswith('--'): - opts, args = do_longs(opts, args[0][2:], longopts, args[1:]) - else: - opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:]) - - return opts, args - -def gnu_getopt(args, shortopts, longopts = []): - """getopt(args, options[, long_options]) -> opts, args - - This function works like getopt(), except that GNU style scanning - mode is used by default. This means that option and non-option - arguments may be intermixed. The getopt() function stops - processing options as soon as a non-option argument is - encountered. - - If the first character of the option string is `+', or if the - environment variable POSIXLY_CORRECT is set, then option - processing stops as soon as a non-option argument is encountered. - - """ - - opts = [] - prog_args = [] - if isinstance(longopts, str): - longopts = [longopts] - else: - longopts = list(longopts) - - # Allow options after non-option arguments? - if shortopts.startswith('+'): - shortopts = shortopts[1:] - all_options_first = True - elif os.environ.get("POSIXLY_CORRECT"): - all_options_first = True - else: - all_options_first = False - - while args: - if args[0] == '--': - prog_args += args[1:] - break - - if args[0][:2] == '--': - opts, args = do_longs(opts, args[0][2:], longopts, args[1:]) - elif args[0][:1] == '-' and args[0] != '-': - opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:]) - else: - if all_options_first: - prog_args += args - break - else: - prog_args.append(args[0]) - args = args[1:] - - return opts, prog_args - -def do_longs(opts, opt, longopts, args): - try: - i = opt.index('=') - except ValueError: - optarg = None - else: - opt, optarg = opt[:i], opt[i+1:] - - has_arg, opt = long_has_args(opt, longopts) - if has_arg: - if optarg is None: - if not args: - raise GetoptError('option --%s requires argument' % opt, opt) - optarg, args = args[0], args[1:] - elif optarg is not None: - raise GetoptError('option --%s must not have an argument' % opt, opt) - opts.append(('--' + opt, optarg or '')) - return opts, args - -# Return: -# has_arg? -# full option name -def long_has_args(opt, longopts): - possibilities = [o for o in longopts if o.startswith(opt)] - if not possibilities: - raise GetoptError('option --%s not recognized' % opt, opt) - # Is there an exact match? - if opt in possibilities: - return False, opt - elif opt + '=' in possibilities: - return True, opt - # No exact match, so better be unique. - if len(possibilities) > 1: - # XXX since possibilities contains all valid continuations, might be - # nice to work them into the error msg - raise GetoptError('option --%s not a unique prefix' % opt, opt) - assert len(possibilities) == 1 - unique_match = possibilities[0] - has_arg = unique_match.endswith('=') - if has_arg: - unique_match = unique_match[:-1] - return has_arg, unique_match - -def do_shorts(opts, optstring, shortopts, args): - while optstring != '': - opt, optstring = optstring[0], optstring[1:] - if short_has_arg(opt, shortopts): - if optstring == '': - if not args: - raise GetoptError('option -%s requires argument' % opt, - opt) - optstring, args = args[0], args[1:] - optarg, optstring = optstring, '' - else: - optarg = '' - opts.append(('-' + opt, optarg)) - return opts, args - -def short_has_arg(opt, shortopts): - for i in range(len(shortopts)): - if opt == shortopts[i] != ':': - return shortopts.startswith(':', i+1) - raise GetoptError('option -%s not recognized' % opt, opt) - -if __name__ == '__main__': - import sys - print getopt(sys.argv[1:], "a:b", ["alpha=", "beta"]) diff --git a/python/Lib/getpass.py b/python/Lib/getpass.py deleted file mode 100755 index 2ac6fd7f38..0000000000 --- a/python/Lib/getpass.py +++ /dev/null @@ -1,179 +0,0 @@ -"""Utilities to get a password and/or the current user name. - -getpass(prompt[, stream]) - Prompt for a password, with echo turned off. -getuser() - Get the user name from the environment or password database. - -GetPassWarning - This UserWarning is issued when getpass() cannot prevent - echoing of the password contents while reading. - -On Windows, the msvcrt module will be used. -On the Mac EasyDialogs.AskPassword is used, if available. - -""" - -# Authors: Piers Lauder (original) -# Guido van Rossum (Windows support and cleanup) -# Gregory P. Smith (tty support & GetPassWarning) - -import os, sys, warnings - -__all__ = ["getpass","getuser","GetPassWarning"] - - -class GetPassWarning(UserWarning): pass - - -def unix_getpass(prompt='Password: ', stream=None): - """Prompt for a password, with echo turned off. - - Args: - prompt: Written on stream to ask for the input. Default: 'Password: ' - stream: A writable file object to display the prompt. Defaults to - the tty. If no tty is available defaults to sys.stderr. - Returns: - The seKr3t input. - Raises: - EOFError: If our input tty or stdin was closed. - GetPassWarning: When we were unable to turn echo off on the input. - - Always restores terminal settings before returning. - """ - fd = None - tty = None - try: - # Always try reading and writing directly on the tty first. - fd = os.open('/dev/tty', os.O_RDWR|os.O_NOCTTY) - tty = os.fdopen(fd, 'w+', 1) - input = tty - if not stream: - stream = tty - except EnvironmentError, e: - # If that fails, see if stdin can be controlled. - try: - fd = sys.stdin.fileno() - except (AttributeError, ValueError): - passwd = fallback_getpass(prompt, stream) - input = sys.stdin - if not stream: - stream = sys.stderr - - if fd is not None: - passwd = None - try: - old = termios.tcgetattr(fd) # a copy to save - new = old[:] - new[3] &= ~termios.ECHO # 3 == 'lflags' - tcsetattr_flags = termios.TCSAFLUSH - if hasattr(termios, 'TCSASOFT'): - tcsetattr_flags |= termios.TCSASOFT - try: - termios.tcsetattr(fd, tcsetattr_flags, new) - passwd = _raw_input(prompt, stream, input=input) - finally: - termios.tcsetattr(fd, tcsetattr_flags, old) - stream.flush() # issue7208 - except termios.error, e: - if passwd is not None: - # _raw_input succeeded. The final tcsetattr failed. Reraise - # instead of leaving the terminal in an unknown state. - raise - # We can't control the tty or stdin. Give up and use normal IO. - # fallback_getpass() raises an appropriate warning. - del input, tty # clean up unused file objects before blocking - passwd = fallback_getpass(prompt, stream) - - stream.write('\n') - return passwd - - -def win_getpass(prompt='Password: ', stream=None): - """Prompt for password with echo off, using Windows getch().""" - if sys.stdin is not sys.__stdin__: - return fallback_getpass(prompt, stream) - import msvcrt - for c in prompt: - msvcrt.putch(c) - pw = "" - while 1: - c = msvcrt.getch() - if c == '\r' or c == '\n': - break - if c == '\003': - raise KeyboardInterrupt - if c == '\b': - pw = pw[:-1] - else: - pw = pw + c - msvcrt.putch('\r') - msvcrt.putch('\n') - return pw - - -def fallback_getpass(prompt='Password: ', stream=None): - warnings.warn("Can not control echo on the terminal.", GetPassWarning, - stacklevel=2) - if not stream: - stream = sys.stderr - print >>stream, "Warning: Password input may be echoed." - return _raw_input(prompt, stream) - - -def _raw_input(prompt="", stream=None, input=None): - # A raw_input() replacement that doesn't save the string in the - # GNU readline history. - if not stream: - stream = sys.stderr - if not input: - input = sys.stdin - prompt = str(prompt) - if prompt: - stream.write(prompt) - stream.flush() - # NOTE: The Python C API calls flockfile() (and unlock) during readline. - line = input.readline() - if not line: - raise EOFError - if line[-1] == '\n': - line = line[:-1] - return line - - -def getuser(): - """Get the username from the environment or password database. - - First try various environment variables, then the password - database. This works on Windows as long as USERNAME is set. - - """ - - import os - - for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'): - user = os.environ.get(name) - if user: - return user - - # If this fails, the exception will "explain" why - import pwd - return pwd.getpwuid(os.getuid())[0] - -# Bind the name getpass to the appropriate function -try: - import termios - # it's possible there is an incompatible termios from the - # McMillan Installer, make sure we have a UNIX-compatible termios - termios.tcgetattr, termios.tcsetattr -except (ImportError, AttributeError): - try: - import msvcrt - except ImportError: - try: - from EasyDialogs import AskPassword - except ImportError: - getpass = fallback_getpass - else: - getpass = AskPassword - else: - getpass = win_getpass -else: - getpass = unix_getpass diff --git a/python/Lib/gettext.py b/python/Lib/gettext.py deleted file mode 100755 index 89e54d569a..0000000000 --- a/python/Lib/gettext.py +++ /dev/null @@ -1,667 +0,0 @@ -"""Internationalization and localization support. - -This module provides internationalization (I18N) and localization (L10N) -support for your Python programs by providing an interface to the GNU gettext -message catalog library. - -I18N refers to the operation by which a program is made aware of multiple -languages. L10N refers to the adaptation of your program, once -internationalized, to the local language and cultural habits. - -""" - -# This module represents the integration of work, contributions, feedback, and -# suggestions from the following people: -# -# Martin von Loewis, who wrote the initial implementation of the underlying -# C-based libintlmodule (later renamed _gettext), along with a skeletal -# gettext.py implementation. -# -# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule, -# which also included a pure-Python implementation to read .mo files if -# intlmodule wasn't available. -# -# James Henstridge, who also wrote a gettext.py module, which has some -# interesting, but currently unsupported experimental features: the notion of -# a Catalog class and instances, and the ability to add to a catalog file via -# a Python API. -# -# Barry Warsaw integrated these modules, wrote the .install() API and code, -# and conformed all C and Python code to Python's coding standards. -# -# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this -# module. -# -# J. David Ibanez implemented plural forms. Bruno Haible fixed some bugs. -# -# TODO: -# - Lazy loading of .mo files. Currently the entire catalog is loaded into -# memory, but that's probably bad for large translated programs. Instead, -# the lexical sort of original strings in GNU .mo files should be exploited -# to do binary searches and lazy initializations. Or you might want to use -# the undocumented double-hash algorithm for .mo files with hash tables, but -# you'll need to study the GNU gettext code to do this. -# -# - Support Solaris .mo file formats. Unfortunately, we've been unable to -# find this format documented anywhere. - - -import locale, copy, os, re, struct, sys -from errno import ENOENT - - -__all__ = ['NullTranslations', 'GNUTranslations', 'Catalog', - 'find', 'translation', 'install', 'textdomain', 'bindtextdomain', - 'bind_textdomain_codeset', - 'dgettext', 'dngettext', 'gettext', 'lgettext', 'ldgettext', - 'ldngettext', 'lngettext', 'ngettext', - ] - -_default_localedir = os.path.join(sys.prefix, 'share', 'locale') - -# Expression parsing for plural form selection. -# -# The gettext library supports a small subset of C syntax. The only -# incompatible difference is that integer literals starting with zero are -# decimal. -# -# https://www.gnu.org/software/gettext/manual/gettext.html#Plural-forms -# http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/plural.y - -_token_pattern = re.compile(r""" - (?P[ \t]+) | # spaces and horizontal tabs - (?P[0-9]+\b) | # decimal integer - (?Pn\b) | # only n is allowed - (?P[()]) | - (?P[-*/%+?:]|[>, - # <=, >=, ==, !=, &&, ||, - # ? : - # unary and bitwise ops - # not allowed - (?P\w+|.) # invalid token - """, re.VERBOSE|re.DOTALL) - -def _tokenize(plural): - for mo in re.finditer(_token_pattern, plural): - kind = mo.lastgroup - if kind == 'WHITESPACES': - continue - value = mo.group(kind) - if kind == 'INVALID': - raise ValueError('invalid token in plural form: %s' % value) - yield value - yield '' - -def _error(value): - if value: - return ValueError('unexpected token in plural form: %s' % value) - else: - return ValueError('unexpected end of plural form') - -_binary_ops = ( - ('||',), - ('&&',), - ('==', '!='), - ('<', '>', '<=', '>='), - ('+', '-'), - ('*', '/', '%'), -) -_binary_ops = {op: i for i, ops in enumerate(_binary_ops, 1) for op in ops} -_c2py_ops = {'||': 'or', '&&': 'and', '/': '//'} - -def _parse(tokens, priority=-1): - result = '' - nexttok = next(tokens) - while nexttok == '!': - result += 'not ' - nexttok = next(tokens) - - if nexttok == '(': - sub, nexttok = _parse(tokens) - result = '%s(%s)' % (result, sub) - if nexttok != ')': - raise ValueError('unbalanced parenthesis in plural form') - elif nexttok == 'n': - result = '%s%s' % (result, nexttok) - else: - try: - value = int(nexttok, 10) - except ValueError: - raise _error(nexttok) - result = '%s%d' % (result, value) - nexttok = next(tokens) - - j = 100 - while nexttok in _binary_ops: - i = _binary_ops[nexttok] - if i < priority: - break - # Break chained comparisons - if i in (3, 4) and j in (3, 4): # '==', '!=', '<', '>', '<=', '>=' - result = '(%s)' % result - # Replace some C operators by their Python equivalents - op = _c2py_ops.get(nexttok, nexttok) - right, nexttok = _parse(tokens, i + 1) - result = '%s %s %s' % (result, op, right) - j = i - if j == priority == 4: # '<', '>', '<=', '>=' - result = '(%s)' % result - - if nexttok == '?' and priority <= 0: - if_true, nexttok = _parse(tokens, 0) - if nexttok != ':': - raise _error(nexttok) - if_false, nexttok = _parse(tokens) - result = '%s if %s else %s' % (if_true, result, if_false) - if priority == 0: - result = '(%s)' % result - - return result, nexttok - -def _as_int(n): - try: - i = round(n) - except TypeError: - raise TypeError('Plural value must be an integer, got %s' % - (n.__class__.__name__,)) - return n - -def c2py(plural): - """Gets a C expression as used in PO files for plural forms and returns a - Python function that implements an equivalent expression. - """ - - if len(plural) > 1000: - raise ValueError('plural form expression is too long') - try: - result, nexttok = _parse(_tokenize(plural)) - if nexttok: - raise _error(nexttok) - - depth = 0 - for c in result: - if c == '(': - depth += 1 - if depth > 20: - # Python compiler limit is about 90. - # The most complex example has 2. - raise ValueError('plural form expression is too complex') - elif c == ')': - depth -= 1 - - ns = {'_as_int': _as_int} - exec('''if 1: - def func(n): - if not isinstance(n, int): - n = _as_int(n) - return int(%s) - ''' % result, ns) - return ns['func'] - except RuntimeError: - # Recursion error can be raised in _parse() or exec(). - raise ValueError('plural form expression is too complex') - - -def _expand_lang(locale): - from locale import normalize - locale = normalize(locale) - COMPONENT_CODESET = 1 << 0 - COMPONENT_TERRITORY = 1 << 1 - COMPONENT_MODIFIER = 1 << 2 - # split up the locale into its base components - mask = 0 - pos = locale.find('@') - if pos >= 0: - modifier = locale[pos:] - locale = locale[:pos] - mask |= COMPONENT_MODIFIER - else: - modifier = '' - pos = locale.find('.') - if pos >= 0: - codeset = locale[pos:] - locale = locale[:pos] - mask |= COMPONENT_CODESET - else: - codeset = '' - pos = locale.find('_') - if pos >= 0: - territory = locale[pos:] - locale = locale[:pos] - mask |= COMPONENT_TERRITORY - else: - territory = '' - language = locale - ret = [] - for i in range(mask+1): - if not (i & ~mask): # if all components for this combo exist ... - val = language - if i & COMPONENT_TERRITORY: val += territory - if i & COMPONENT_CODESET: val += codeset - if i & COMPONENT_MODIFIER: val += modifier - ret.append(val) - ret.reverse() - return ret - - - -class NullTranslations: - def __init__(self, fp=None): - self._info = {} - self._charset = None - self._output_charset = None - self._fallback = None - if fp is not None: - self._parse(fp) - - def _parse(self, fp): - pass - - def add_fallback(self, fallback): - if self._fallback: - self._fallback.add_fallback(fallback) - else: - self._fallback = fallback - - def gettext(self, message): - if self._fallback: - return self._fallback.gettext(message) - return message - - def lgettext(self, message): - if self._fallback: - return self._fallback.lgettext(message) - return message - - def ngettext(self, msgid1, msgid2, n): - if self._fallback: - return self._fallback.ngettext(msgid1, msgid2, n) - if n == 1: - return msgid1 - else: - return msgid2 - - def lngettext(self, msgid1, msgid2, n): - if self._fallback: - return self._fallback.lngettext(msgid1, msgid2, n) - if n == 1: - return msgid1 - else: - return msgid2 - - def ugettext(self, message): - if self._fallback: - return self._fallback.ugettext(message) - return unicode(message) - - def ungettext(self, msgid1, msgid2, n): - if self._fallback: - return self._fallback.ungettext(msgid1, msgid2, n) - if n == 1: - return unicode(msgid1) - else: - return unicode(msgid2) - - def info(self): - return self._info - - def charset(self): - return self._charset - - def output_charset(self): - return self._output_charset - - def set_output_charset(self, charset): - self._output_charset = charset - - def install(self, unicode=False, names=None): - import __builtin__ - __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext - if hasattr(names, "__contains__"): - if "gettext" in names: - __builtin__.__dict__['gettext'] = __builtin__.__dict__['_'] - if "ngettext" in names: - __builtin__.__dict__['ngettext'] = (unicode and self.ungettext - or self.ngettext) - if "lgettext" in names: - __builtin__.__dict__['lgettext'] = self.lgettext - if "lngettext" in names: - __builtin__.__dict__['lngettext'] = self.lngettext - - -class GNUTranslations(NullTranslations): - # Magic number of .mo files - LE_MAGIC = 0x950412deL - BE_MAGIC = 0xde120495L - - def _parse(self, fp): - """Override this method to support alternative .mo formats.""" - unpack = struct.unpack - filename = getattr(fp, 'name', '') - # Parse the .mo file header, which consists of 5 little endian 32 - # bit words. - self._catalog = catalog = {} - self.plural = lambda n: int(n != 1) # germanic plural by default - buf = fp.read() - buflen = len(buf) - # Are we big endian or little endian? - magic = unpack('4I', buf[4:20]) - ii = '>II' - else: - raise IOError(0, 'Bad magic number', filename) - # Now put all messages from the .mo file buffer into the catalog - # dictionary. - for i in xrange(0, msgcount): - mlen, moff = unpack(ii, buf[masteridx:masteridx+8]) - mend = moff + mlen - tlen, toff = unpack(ii, buf[transidx:transidx+8]) - tend = toff + tlen - if mend < buflen and tend < buflen: - msg = buf[moff:mend] - tmsg = buf[toff:tend] - else: - raise IOError(0, 'File is corrupt', filename) - # See if we're looking at GNU .mo conventions for metadata - if mlen == 0: - # Catalog description - lastk = None - for item in tmsg.splitlines(): - item = item.strip() - if not item: - continue - k = v = None - if ':' in item: - k, v = item.split(':', 1) - k = k.strip().lower() - v = v.strip() - self._info[k] = v - lastk = k - elif lastk: - self._info[lastk] += '\n' + item - if k == 'content-type': - self._charset = v.split('charset=')[1] - elif k == 'plural-forms': - v = v.split(';') - plural = v[1].split('plural=')[1] - self.plural = c2py(plural) - # Note: we unconditionally convert both msgids and msgstrs to - # Unicode using the character encoding specified in the charset - # parameter of the Content-Type header. The gettext documentation - # strongly encourages msgids to be us-ascii, but some applications - # require alternative encodings (e.g. Zope's ZCML and ZPT). For - # traditional gettext applications, the msgid conversion will - # cause no problems since us-ascii should always be a subset of - # the charset encoding. We may want to fall back to 8-bit msgids - # if the Unicode conversion fails. - if '\x00' in msg: - # Plural forms - msgid1, msgid2 = msg.split('\x00') - tmsg = tmsg.split('\x00') - if self._charset: - msgid1 = unicode(msgid1, self._charset) - tmsg = [unicode(x, self._charset) for x in tmsg] - for i in range(len(tmsg)): - catalog[(msgid1, i)] = tmsg[i] - else: - if self._charset: - msg = unicode(msg, self._charset) - tmsg = unicode(tmsg, self._charset) - catalog[msg] = tmsg - # advance to next entry in the seek tables - masteridx += 8 - transidx += 8 - - def gettext(self, message): - missing = object() - tmsg = self._catalog.get(message, missing) - if tmsg is missing: - if self._fallback: - return self._fallback.gettext(message) - return message - # Encode the Unicode tmsg back to an 8-bit string, if possible - if self._output_charset: - return tmsg.encode(self._output_charset) - elif self._charset: - return tmsg.encode(self._charset) - return tmsg - - def lgettext(self, message): - missing = object() - tmsg = self._catalog.get(message, missing) - if tmsg is missing: - if self._fallback: - return self._fallback.lgettext(message) - return message - if self._output_charset: - return tmsg.encode(self._output_charset) - return tmsg.encode(locale.getpreferredencoding()) - - def ngettext(self, msgid1, msgid2, n): - try: - tmsg = self._catalog[(msgid1, self.plural(n))] - if self._output_charset: - return tmsg.encode(self._output_charset) - elif self._charset: - return tmsg.encode(self._charset) - return tmsg - except KeyError: - if self._fallback: - return self._fallback.ngettext(msgid1, msgid2, n) - if n == 1: - return msgid1 - else: - return msgid2 - - def lngettext(self, msgid1, msgid2, n): - try: - tmsg = self._catalog[(msgid1, self.plural(n))] - if self._output_charset: - return tmsg.encode(self._output_charset) - return tmsg.encode(locale.getpreferredencoding()) - except KeyError: - if self._fallback: - return self._fallback.lngettext(msgid1, msgid2, n) - if n == 1: - return msgid1 - else: - return msgid2 - - def ugettext(self, message): - missing = object() - tmsg = self._catalog.get(message, missing) - if tmsg is missing: - if self._fallback: - return self._fallback.ugettext(message) - return unicode(message) - return tmsg - - def ungettext(self, msgid1, msgid2, n): - try: - tmsg = self._catalog[(msgid1, self.plural(n))] - except KeyError: - if self._fallback: - return self._fallback.ungettext(msgid1, msgid2, n) - if n == 1: - tmsg = unicode(msgid1) - else: - tmsg = unicode(msgid2) - return tmsg - - -# Locate a .mo file using the gettext strategy -def find(domain, localedir=None, languages=None, all=0): - # Get some reasonable defaults for arguments that were not supplied - if localedir is None: - localedir = _default_localedir - if languages is None: - languages = [] - for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'): - val = os.environ.get(envar) - if val: - languages = val.split(':') - break - if 'C' not in languages: - languages.append('C') - # now normalize and expand the languages - nelangs = [] - for lang in languages: - for nelang in _expand_lang(lang): - if nelang not in nelangs: - nelangs.append(nelang) - # select a language - if all: - result = [] - else: - result = None - for lang in nelangs: - if lang == 'C': - break - mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain) - if os.path.exists(mofile): - if all: - result.append(mofile) - else: - return mofile - return result - - - -# a mapping between absolute .mo file path and Translation object -_translations = {} - -def translation(domain, localedir=None, languages=None, - class_=None, fallback=False, codeset=None): - if class_ is None: - class_ = GNUTranslations - mofiles = find(domain, localedir, languages, all=1) - if not mofiles: - if fallback: - return NullTranslations() - raise IOError(ENOENT, 'No translation file found for domain', domain) - # Avoid opening, reading, and parsing the .mo file after it's been done - # once. - result = None - for mofile in mofiles: - key = (class_, os.path.abspath(mofile)) - t = _translations.get(key) - if t is None: - with open(mofile, 'rb') as fp: - t = _translations.setdefault(key, class_(fp)) - # Copy the translation object to allow setting fallbacks and - # output charset. All other instance data is shared with the - # cached object. - t = copy.copy(t) - if codeset: - t.set_output_charset(codeset) - if result is None: - result = t - else: - result.add_fallback(t) - return result - - -def install(domain, localedir=None, unicode=False, codeset=None, names=None): - t = translation(domain, localedir, fallback=True, codeset=codeset) - t.install(unicode, names) - - - -# a mapping b/w domains and locale directories -_localedirs = {} -# a mapping b/w domains and codesets -_localecodesets = {} -# current global domain, `messages' used for compatibility w/ GNU gettext -_current_domain = 'messages' - - -def textdomain(domain=None): - global _current_domain - if domain is not None: - _current_domain = domain - return _current_domain - - -def bindtextdomain(domain, localedir=None): - global _localedirs - if localedir is not None: - _localedirs[domain] = localedir - return _localedirs.get(domain, _default_localedir) - - -def bind_textdomain_codeset(domain, codeset=None): - global _localecodesets - if codeset is not None: - _localecodesets[domain] = codeset - return _localecodesets.get(domain) - - -def dgettext(domain, message): - try: - t = translation(domain, _localedirs.get(domain, None), - codeset=_localecodesets.get(domain)) - except IOError: - return message - return t.gettext(message) - -def ldgettext(domain, message): - try: - t = translation(domain, _localedirs.get(domain, None), - codeset=_localecodesets.get(domain)) - except IOError: - return message - return t.lgettext(message) - -def dngettext(domain, msgid1, msgid2, n): - try: - t = translation(domain, _localedirs.get(domain, None), - codeset=_localecodesets.get(domain)) - except IOError: - if n == 1: - return msgid1 - else: - return msgid2 - return t.ngettext(msgid1, msgid2, n) - -def ldngettext(domain, msgid1, msgid2, n): - try: - t = translation(domain, _localedirs.get(domain, None), - codeset=_localecodesets.get(domain)) - except IOError: - if n == 1: - return msgid1 - else: - return msgid2 - return t.lngettext(msgid1, msgid2, n) - -def gettext(message): - return dgettext(_current_domain, message) - -def lgettext(message): - return ldgettext(_current_domain, message) - -def ngettext(msgid1, msgid2, n): - return dngettext(_current_domain, msgid1, msgid2, n) - -def lngettext(msgid1, msgid2, n): - return ldngettext(_current_domain, msgid1, msgid2, n) - -# dcgettext() has been deemed unnecessary and is not implemented. - -# James Henstridge's Catalog constructor from GNOME gettext. Documented usage -# was: -# -# import gettext -# cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR) -# _ = cat.gettext -# print _('Hello World') - -# The resulting catalog object currently don't support access through a -# dictionary API, which was supported (but apparently unused) in GNOME -# gettext. - -Catalog = translation diff --git a/python/Lib/glob.py b/python/Lib/glob.py deleted file mode 100755 index b3d9ec1b1f..0000000000 --- a/python/Lib/glob.py +++ /dev/null @@ -1,100 +0,0 @@ -"""Filename globbing utility.""" - -import sys -import os -import re -import fnmatch - -try: - _unicode = unicode -except NameError: - # If Python is built without Unicode support, the unicode type - # will not exist. Fake one. - class _unicode(object): - pass - -__all__ = ["glob", "iglob"] - -def glob(pathname): - """Return a list of paths matching a pathname pattern. - - The pattern may contain simple shell-style wildcards a la - fnmatch. However, unlike fnmatch, filenames starting with a - dot are special cases that are not matched by '*' and '?' - patterns. - - """ - return list(iglob(pathname)) - -def iglob(pathname): - """Return an iterator which yields the paths matching a pathname pattern. - - The pattern may contain simple shell-style wildcards a la - fnmatch. However, unlike fnmatch, filenames starting with a - dot are special cases that are not matched by '*' and '?' - patterns. - - """ - dirname, basename = os.path.split(pathname) - if not has_magic(pathname): - if basename: - if os.path.lexists(pathname): - yield pathname - else: - # Patterns ending with a slash should match only directories - if os.path.isdir(dirname): - yield pathname - return - if not dirname: - for name in glob1(os.curdir, basename): - yield name - return - # `os.path.split()` returns the argument itself as a dirname if it is a - # drive or UNC path. Prevent an infinite recursion if a drive or UNC path - # contains magic characters (i.e. r'\\?\C:'). - if dirname != pathname and has_magic(dirname): - dirs = iglob(dirname) - else: - dirs = [dirname] - if has_magic(basename): - glob_in_dir = glob1 - else: - glob_in_dir = glob0 - for dirname in dirs: - for name in glob_in_dir(dirname, basename): - yield os.path.join(dirname, name) - -# These 2 helper functions non-recursively glob inside a literal directory. -# They return a list of basenames. `glob1` accepts a pattern while `glob0` -# takes a literal basename (so it only has to check for its existence). - -def glob1(dirname, pattern): - if not dirname: - dirname = os.curdir - if isinstance(pattern, _unicode) and not isinstance(dirname, unicode): - dirname = unicode(dirname, sys.getfilesystemencoding() or - sys.getdefaultencoding()) - try: - names = os.listdir(dirname) - except os.error: - return [] - if pattern[0] != '.': - names = filter(lambda x: x[0] != '.', names) - return fnmatch.filter(names, pattern) - -def glob0(dirname, basename): - if basename == '': - # `os.path.split()` returns an empty basename for paths ending with a - # directory separator. 'q*x/' should match only directories. - if os.path.isdir(dirname): - return [basename] - else: - if os.path.lexists(os.path.join(dirname, basename)): - return [basename] - return [] - - -magic_check = re.compile('[*?[]') - -def has_magic(s): - return magic_check.search(s) is not None diff --git a/python/Lib/gzip.py b/python/Lib/gzip.py deleted file mode 100755 index 07c6db493b..0000000000 --- a/python/Lib/gzip.py +++ /dev/null @@ -1,527 +0,0 @@ -"""Functions that read and write gzipped files. - -The user of the file doesn't have to worry about the compression, -but random access is not allowed.""" - -# based on Andrew Kuchling's minigzip.py distributed with the zlib module - -import struct, sys, time, os -import zlib -import io -import __builtin__ - -__all__ = ["GzipFile","open"] - -FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 - -READ, WRITE = 1, 2 - -def write32u(output, value): - # The L format writes the bit pattern correctly whether signed - # or unsigned. - output.write(struct.pack("' - - def _check_closed(self): - """Raises a ValueError if the underlying file object has been closed. - - """ - if self.closed: - raise ValueError('I/O operation on closed file.') - - def _init_write(self, filename): - self.name = filename - self.crc = zlib.crc32("") & 0xffffffffL - self.size = 0 - self.writebuf = [] - self.bufsize = 0 - - def _write_gzip_header(self): - self.fileobj.write('\037\213') # magic header - self.fileobj.write('\010') # compression method - try: - # RFC 1952 requires the FNAME field to be Latin-1. Do not - # include filenames that cannot be represented that way. - fname = os.path.basename(self.name) - if not isinstance(fname, str): - fname = fname.encode('latin-1') - if fname.endswith('.gz'): - fname = fname[:-3] - except UnicodeEncodeError: - fname = '' - flags = 0 - if fname: - flags = FNAME - self.fileobj.write(chr(flags)) - mtime = self.mtime - if mtime is None: - mtime = time.time() - write32u(self.fileobj, long(mtime)) - self.fileobj.write('\002') - self.fileobj.write('\377') - if fname: - self.fileobj.write(fname + '\000') - - def _init_read(self): - self.crc = zlib.crc32("") & 0xffffffffL - self.size = 0 - - def _read_gzip_header(self): - magic = self.fileobj.read(2) - if magic != '\037\213': - raise IOError, 'Not a gzipped file' - method = ord( self.fileobj.read(1) ) - if method != 8: - raise IOError, 'Unknown compression method' - flag = ord( self.fileobj.read(1) ) - self.mtime = read32(self.fileobj) - # extraflag = self.fileobj.read(1) - # os = self.fileobj.read(1) - self.fileobj.read(2) - - if flag & FEXTRA: - # Read & discard the extra field, if present - xlen = ord(self.fileobj.read(1)) - xlen = xlen + 256*ord(self.fileobj.read(1)) - self.fileobj.read(xlen) - if flag & FNAME: - # Read and discard a null-terminated string containing the filename - while True: - s = self.fileobj.read(1) - if not s or s=='\000': - break - if flag & FCOMMENT: - # Read and discard a null-terminated string containing a comment - while True: - s = self.fileobj.read(1) - if not s or s=='\000': - break - if flag & FHCRC: - self.fileobj.read(2) # Read & discard the 16-bit header CRC - - def write(self,data): - self._check_closed() - if self.mode != WRITE: - import errno - raise IOError(errno.EBADF, "write() on read-only GzipFile object") - - if self.fileobj is None: - raise ValueError, "write() on closed GzipFile object" - - # Convert data type if called by io.BufferedWriter. - if isinstance(data, memoryview): - data = data.tobytes() - - if len(data) > 0: - self.fileobj.write(self.compress.compress(data)) - self.size += len(data) - self.crc = zlib.crc32(data, self.crc) & 0xffffffffL - self.offset += len(data) - - return len(data) - - def read(self, size=-1): - self._check_closed() - if self.mode != READ: - import errno - raise IOError(errno.EBADF, "read() on write-only GzipFile object") - - if self.extrasize <= 0 and self.fileobj is None: - return '' - - readsize = 1024 - if size < 0: # get the whole thing - try: - while True: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - size = self.extrasize - else: # just get some more of it - try: - while size > self.extrasize: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - if size > self.extrasize: - size = self.extrasize - - offset = self.offset - self.extrastart - chunk = self.extrabuf[offset: offset + size] - self.extrasize = self.extrasize - size - - self.offset += size - return chunk - - def _unread(self, buf): - self.extrasize = len(buf) + self.extrasize - self.offset -= len(buf) - - def _read(self, size=1024): - if self.fileobj is None: - raise EOFError, "Reached EOF" - - if self._new_member: - # If the _new_member flag is set, we have to - # jump to the next member, if there is one. - # - # First, check if we're at the end of the file; - # if so, it's time to stop; no more members to read. - pos = self.fileobj.tell() # Save current position - self.fileobj.seek(0, 2) # Seek to end of file - if pos == self.fileobj.tell(): - raise EOFError, "Reached EOF" - else: - self.fileobj.seek( pos ) # Return to original position - - self._init_read() - self._read_gzip_header() - self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) - self._new_member = False - - # Read a chunk of data from the file - buf = self.fileobj.read(size) - - # If the EOF has been reached, flush the decompression object - # and mark this object as finished. - - if buf == "": - uncompress = self.decompress.flush() - self._read_eof() - self._add_read_data( uncompress ) - raise EOFError, 'Reached EOF' - - uncompress = self.decompress.decompress(buf) - self._add_read_data( uncompress ) - - if self.decompress.unused_data != "": - # Ending case: we've come to the end of a member in the file, - # so seek back to the start of the unused data, finish up - # this member, and read a new gzip header. - # (The number of bytes to seek back is the length of the unused - # data, minus 8 because _read_eof() will rewind a further 8 bytes) - self.fileobj.seek( -len(self.decompress.unused_data)+8, 1) - - # Check the CRC and file size, and set the flag so we read - # a new member on the next call - self._read_eof() - self._new_member = True - - def _add_read_data(self, data): - self.crc = zlib.crc32(data, self.crc) & 0xffffffffL - offset = self.offset - self.extrastart - self.extrabuf = self.extrabuf[offset:] + data - self.extrasize = self.extrasize + len(data) - self.extrastart = self.offset - self.size = self.size + len(data) - - def _read_eof(self): - # We've read to the end of the file, so we have to rewind in order - # to reread the 8 bytes containing the CRC and the file size. - # We check the that the computed CRC and size of the - # uncompressed data matches the stored values. Note that the size - # stored is the true file size mod 2**32. - self.fileobj.seek(-8, 1) - crc32 = read32(self.fileobj) - isize = read32(self.fileobj) # may exceed 2GB - if crc32 != self.crc: - raise IOError("CRC check failed %s != %s" % (hex(crc32), - hex(self.crc))) - elif isize != (self.size & 0xffffffffL): - raise IOError, "Incorrect length of data produced" - - # Gzip files can be padded with zeroes and still have archives. - # Consume all zero bytes and set the file position to the first - # non-zero byte. See http://www.gzip.org/#faq8 - c = "\x00" - while c == "\x00": - c = self.fileobj.read(1) - if c: - self.fileobj.seek(-1, 1) - - @property - def closed(self): - return self.fileobj is None - - def close(self): - fileobj = self.fileobj - if fileobj is None: - return - self.fileobj = None - try: - if self.mode == WRITE: - fileobj.write(self.compress.flush()) - write32u(fileobj, self.crc) - # self.size may exceed 2GB, or even 4GB - write32u(fileobj, self.size & 0xffffffffL) - finally: - myfileobj = self.myfileobj - if myfileobj: - self.myfileobj = None - myfileobj.close() - - def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): - self._check_closed() - if self.mode == WRITE: - # Ensure the compressor's buffer is flushed - self.fileobj.write(self.compress.flush(zlib_mode)) - self.fileobj.flush() - - def fileno(self): - """Invoke the underlying file object's fileno() method. - - This will raise AttributeError if the underlying file object - doesn't support fileno(). - """ - return self.fileobj.fileno() - - def rewind(self): - '''Return the uncompressed stream file position indicator to the - beginning of the file''' - if self.mode != READ: - raise IOError("Can't rewind in write mode") - self.fileobj.seek(0) - self._new_member = True - self.extrabuf = "" - self.extrasize = 0 - self.extrastart = 0 - self.offset = 0 - - def readable(self): - return self.mode == READ - - def writable(self): - return self.mode == WRITE - - def seekable(self): - return True - - def seek(self, offset, whence=0): - if whence: - if whence == 1: - offset = self.offset + offset - else: - raise ValueError('Seek from end not supported') - if self.mode == WRITE: - if offset < self.offset: - raise IOError('Negative seek in write mode') - count = offset - self.offset - for i in xrange(count // 1024): - self.write(1024 * '\0') - self.write((count % 1024) * '\0') - elif self.mode == READ: - if offset < self.offset: - # for negative seek, rewind and do positive seek - self.rewind() - count = offset - self.offset - for i in xrange(count // 1024): - self.read(1024) - self.read(count % 1024) - - return self.offset - - def readline(self, size=-1): - if size < 0: - # Shortcut common case - newline found in buffer. - offset = self.offset - self.extrastart - i = self.extrabuf.find('\n', offset) + 1 - if i > 0: - self.extrasize -= i - offset - self.offset += i - offset - return self.extrabuf[offset: i] - - size = sys.maxint - readsize = self.min_readsize - else: - readsize = size - bufs = [] - while size != 0: - c = self.read(readsize) - i = c.find('\n') - - # We set i=size to break out of the loop under two - # conditions: 1) there's no newline, and the chunk is - # larger than size, or 2) there is a newline, but the - # resulting line would be longer than 'size'. - if (size <= i) or (i == -1 and len(c) > size): - i = size - 1 - - if i >= 0 or c == '': - bufs.append(c[:i + 1]) # Add portion of last chunk - self._unread(c[i + 1:]) # Push back rest of chunk - break - - # Append chunk to list, decrease 'size', - bufs.append(c) - size = size - len(c) - readsize = min(size, readsize * 2) - if readsize > self.min_readsize: - self.min_readsize = min(readsize, self.min_readsize * 2, 512) - return ''.join(bufs) # Return resulting line - - -def _test(): - # Act like gzip; with -d, act like gunzip. - # The input file is not deleted, however, nor are any other gzip - # options or features supported. - args = sys.argv[1:] - decompress = args and args[0] == "-d" - if decompress: - args = args[1:] - if not args: - args = ["-"] - for arg in args: - if decompress: - if arg == "-": - f = GzipFile(filename="", mode="rb", fileobj=sys.stdin) - g = sys.stdout - else: - if arg[-3:] != ".gz": - print "filename doesn't end in .gz:", repr(arg) - continue - f = open(arg, "rb") - g = __builtin__.open(arg[:-3], "wb") - else: - if arg == "-": - f = sys.stdin - g = GzipFile(filename="", mode="wb", fileobj=sys.stdout) - else: - f = __builtin__.open(arg, "rb") - g = open(arg + ".gz", "wb") - while True: - chunk = f.read(1024) - if not chunk: - break - g.write(chunk) - if g is not sys.stdout: - g.close() - if f is not sys.stdin: - f.close() - -if __name__ == '__main__': - _test() diff --git a/python/Lib/hashlib.py b/python/Lib/hashlib.py deleted file mode 100755 index bbd06b9996..0000000000 --- a/python/Lib/hashlib.py +++ /dev/null @@ -1,221 +0,0 @@ -# $Id$ -# -# Copyright (C) 2005 Gregory P. Smith (greg@krypto.org) -# Licensed to PSF under a Contributor Agreement. -# - -__doc__ = """hashlib module - A common interface to many hash functions. - -new(name, string='') - returns a new hash object implementing the - given hash function; initializing the hash - using the given string data. - -Named constructor functions are also available, these are much faster -than using new(): - -md5(), sha1(), sha224(), sha256(), sha384(), and sha512() - -More algorithms may be available on your platform but the above are guaranteed -to exist. See the algorithms_guaranteed and algorithms_available attributes -to find out what algorithm names can be passed to new(). - -NOTE: If you want the adler32 or crc32 hash functions they are available in -the zlib module. - -Choose your hash function wisely. Some have known collision weaknesses. -sha384 and sha512 will be slow on 32 bit platforms. - -Hash objects have these methods: - - update(arg): Update the hash object with the string arg. Repeated calls - are equivalent to a single call with the concatenation of all - the arguments. - - digest(): Return the digest of the strings passed to the update() method - so far. This may contain non-ASCII characters, including - NUL bytes. - - hexdigest(): Like digest() except the digest is returned as a string of - double length, containing only hexadecimal digits. - - copy(): Return a copy (clone) of the hash object. This can be used to - efficiently compute the digests of strings that share a common - initial substring. - -For example, to obtain the digest of the string 'Nobody inspects the -spammish repetition': - - >>> import hashlib - >>> m = hashlib.md5() - >>> m.update("Nobody inspects") - >>> m.update(" the spammish repetition") - >>> m.digest() - '\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9' - -More condensed: - - >>> hashlib.sha224("Nobody inspects the spammish repetition").hexdigest() - 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' - -""" - -# This tuple and __get_builtin_constructor() must be modified if a new -# always available algorithm is added. -__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512') - -algorithms_guaranteed = set(__always_supported) -algorithms_available = set(__always_supported) - -algorithms = __always_supported - -__all__ = __always_supported + ('new', 'algorithms_guaranteed', - 'algorithms_available', 'algorithms', - 'pbkdf2_hmac') - - -def __get_builtin_constructor(name): - try: - if name in ('SHA1', 'sha1'): - import _sha - return _sha.new - elif name in ('MD5', 'md5'): - import _md5 - return _md5.new - elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'): - import _sha256 - bs = name[3:] - if bs == '256': - return _sha256.sha256 - elif bs == '224': - return _sha256.sha224 - elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'): - import _sha512 - bs = name[3:] - if bs == '512': - return _sha512.sha512 - elif bs == '384': - return _sha512.sha384 - except ImportError: - pass # no extension module, this hash is unsupported. - - raise ValueError('unsupported hash type ' + name) - - -def __get_openssl_constructor(name): - try: - f = getattr(_hashlib, 'openssl_' + name) - # Allow the C module to raise ValueError. The function will be - # defined but the hash not actually available thanks to OpenSSL. - f() - # Use the C function directly (very fast) - return f - except (AttributeError, ValueError): - return __get_builtin_constructor(name) - - -def __py_new(name, string=''): - """new(name, string='') - Return a new hashing object using the named algorithm; - optionally initialized with a string. - """ - return __get_builtin_constructor(name)(string) - - -def __hash_new(name, string=''): - """new(name, string='') - Return a new hashing object using the named algorithm; - optionally initialized with a string. - """ - try: - return _hashlib.new(name, string) - except ValueError: - # If the _hashlib module (OpenSSL) doesn't support the named - # hash, try using our builtin implementations. - # This allows for SHA224/256 and SHA384/512 support even though - # the OpenSSL library prior to 0.9.8 doesn't provide them. - return __get_builtin_constructor(name)(string) - - -try: - import _hashlib - new = __hash_new - __get_hash = __get_openssl_constructor - algorithms_available = algorithms_available.union( - _hashlib.openssl_md_meth_names) -except ImportError: - new = __py_new - __get_hash = __get_builtin_constructor - -for __func_name in __always_supported: - # try them all, some may not work due to the OpenSSL - # version not supporting that algorithm. - try: - globals()[__func_name] = __get_hash(__func_name) - except ValueError: - import logging - logging.exception('code for hash %s was not found.', __func_name) - - -try: - # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA - from _hashlib import pbkdf2_hmac -except ImportError: - import binascii - import struct - - _trans_5C = b"".join(chr(x ^ 0x5C) for x in range(256)) - _trans_36 = b"".join(chr(x ^ 0x36) for x in range(256)) - - def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None): - """Password based key derivation function 2 (PKCS #5 v2.0) - - This Python implementations based on the hmac module about as fast - as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster - for long passwords. - """ - if not isinstance(hash_name, str): - raise TypeError(hash_name) - - if not isinstance(password, (bytes, bytearray)): - password = bytes(buffer(password)) - if not isinstance(salt, (bytes, bytearray)): - salt = bytes(buffer(salt)) - - # Fast inline HMAC implementation - inner = new(hash_name) - outer = new(hash_name) - blocksize = getattr(inner, 'block_size', 64) - if len(password) > blocksize: - password = new(hash_name, password).digest() - password = password + b'\x00' * (blocksize - len(password)) - inner.update(password.translate(_trans_36)) - outer.update(password.translate(_trans_5C)) - - def prf(msg, inner=inner, outer=outer): - # PBKDF2_HMAC uses the password as key. We can re-use the same - # digest objects and just update copies to skip initialization. - icpy = inner.copy() - ocpy = outer.copy() - icpy.update(msg) - ocpy.update(icpy.digest()) - return ocpy.digest() - - if iterations < 1: - raise ValueError(iterations) - if dklen is None: - dklen = outer.digest_size - if dklen < 1: - raise ValueError(dklen) - - hex_format_string = "%%0%ix" % (new(hash_name).digest_size * 2) - - dkey = b'' - loop = 1 - while len(dkey) < dklen: - prev = prf(salt + struct.pack(b'>I', loop)) - rkey = int(binascii.hexlify(prev), 16) - for i in xrange(iterations - 1): - prev = prf(prev) - rkey ^= int(binascii.hexlify(prev), 16) - loop += 1 - dkey += binascii.unhexlify(hex_format_string % rkey) - - return dkey[:dklen] - -# Cleanup locals() -del __always_supported, __func_name, __get_hash -del __py_new, __hash_new, __get_openssl_constructor diff --git a/python/Lib/heapq.py b/python/Lib/heapq.py deleted file mode 100755 index f8269e15e4..0000000000 --- a/python/Lib/heapq.py +++ /dev/null @@ -1,485 +0,0 @@ -# -*- coding: latin-1 -*- - -"""Heap queue algorithm (a.k.a. priority queue). - -Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for -all k, counting elements from 0. For the sake of comparison, -non-existing elements are considered to be infinite. The interesting -property of a heap is that a[0] is always its smallest element. - -Usage: - -heap = [] # creates an empty heap -heappush(heap, item) # pushes a new item on the heap -item = heappop(heap) # pops the smallest item from the heap -item = heap[0] # smallest item on the heap without popping it -heapify(x) # transforms list into a heap, in-place, in linear time -item = heapreplace(heap, item) # pops and returns smallest item, and adds - # new item; the heap size is unchanged - -Our API differs from textbook heap algorithms as follows: - -- We use 0-based indexing. This makes the relationship between the - index for a node and the indexes for its children slightly less - obvious, but is more suitable since Python uses 0-based indexing. - -- Our heappop() method returns the smallest item, not the largest. - -These two make it possible to view the heap as a regular Python list -without surprises: heap[0] is the smallest item, and heap.sort() -maintains the heap invariant! -""" - -# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger - -__about__ = """Heap queues - -[explanation by François Pinard] - -Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for -all k, counting elements from 0. For the sake of comparison, -non-existing elements are considered to be infinite. The interesting -property of a heap is that a[0] is always its smallest element. - -The strange invariant above is meant to be an efficient memory -representation for a tournament. The numbers below are `k', not a[k]: - - 0 - - 1 2 - - 3 4 5 6 - - 7 8 9 10 11 12 13 14 - - 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 - - -In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In -a usual binary tournament we see in sports, each cell is the winner -over the two cells it tops, and we can trace the winner down the tree -to see all opponents s/he had. However, in many computer applications -of such tournaments, we do not need to trace the history of a winner. -To be more memory efficient, when a winner is promoted, we try to -replace it by something else at a lower level, and the rule becomes -that a cell and the two cells it tops contain three different items, -but the top cell "wins" over the two topped cells. - -If this heap invariant is protected at all time, index 0 is clearly -the overall winner. The simplest algorithmic way to remove it and -find the "next" winner is to move some loser (let's say cell 30 in the -diagram above) into the 0 position, and then percolate this new 0 down -the tree, exchanging values, until the invariant is re-established. -This is clearly logarithmic on the total number of items in the tree. -By iterating over all items, you get an O(n ln n) sort. - -A nice feature of this sort is that you can efficiently insert new -items while the sort is going on, provided that the inserted items are -not "better" than the last 0'th element you extracted. This is -especially useful in simulation contexts, where the tree holds all -incoming events, and the "win" condition means the smallest scheduled -time. When an event schedule other events for execution, they are -scheduled into the future, so they can easily go into the heap. So, a -heap is a good structure for implementing schedulers (this is what I -used for my MIDI sequencer :-). - -Various structures for implementing schedulers have been extensively -studied, and heaps are good for this, as they are reasonably speedy, -the speed is almost constant, and the worst case is not much different -than the average case. However, there are other representations which -are more efficient overall, yet the worst cases might be terrible. - -Heaps are also very useful in big disk sorts. You most probably all -know that a big sort implies producing "runs" (which are pre-sorted -sequences, which size is usually related to the amount of CPU memory), -followed by a merging passes for these runs, which merging is often -very cleverly organised[1]. It is very important that the initial -sort produces the longest runs possible. Tournaments are a good way -to that. If, using all the memory available to hold a tournament, you -replace and percolate items that happen to fit the current run, you'll -produce runs which are twice the size of the memory for random input, -and much better for input fuzzily ordered. - -Moreover, if you output the 0'th item on disk and get an input which -may not fit in the current tournament (because the value "wins" over -the last output value), it cannot fit in the heap, so the size of the -heap decreases. The freed memory could be cleverly reused immediately -for progressively building a second heap, which grows at exactly the -same rate the first heap is melting. When the first heap completely -vanishes, you switch heaps and start a new run. Clever and quite -effective! - -In a word, heaps are useful memory structures to know. I use them in -a few applications, and I think it is good to keep a `heap' module -around. :-) - --------------------- -[1] The disk balancing algorithms which are current, nowadays, are -more annoying than clever, and this is a consequence of the seeking -capabilities of the disks. On devices which cannot seek, like big -tape drives, the story was quite different, and one had to be very -clever to ensure (far in advance) that each tape movement will be the -most effective possible (that is, will best participate at -"progressing" the merge). Some tapes were even able to read -backwards, and this was also used to avoid the rewinding time. -Believe me, real good tape sorts were quite spectacular to watch! -From all times, sorting has always been a Great Art! :-) -""" - -__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge', - 'nlargest', 'nsmallest', 'heappushpop'] - -from itertools import islice, count, imap, izip, tee, chain -from operator import itemgetter - -def cmp_lt(x, y): - # Use __lt__ if available; otherwise, try __le__. - # In Py3.x, only __lt__ will be called. - return (x < y) if hasattr(x, '__lt__') else (not y <= x) - -def heappush(heap, item): - """Push item onto heap, maintaining the heap invariant.""" - heap.append(item) - _siftdown(heap, 0, len(heap)-1) - -def heappop(heap): - """Pop the smallest item off the heap, maintaining the heap invariant.""" - lastelt = heap.pop() # raises appropriate IndexError if heap is empty - if heap: - returnitem = heap[0] - heap[0] = lastelt - _siftup(heap, 0) - else: - returnitem = lastelt - return returnitem - -def heapreplace(heap, item): - """Pop and return the current smallest value, and add the new item. - - This is more efficient than heappop() followed by heappush(), and can be - more appropriate when using a fixed-size heap. Note that the value - returned may be larger than item! That constrains reasonable uses of - this routine unless written as part of a conditional replacement: - - if item > heap[0]: - item = heapreplace(heap, item) - """ - returnitem = heap[0] # raises appropriate IndexError if heap is empty - heap[0] = item - _siftup(heap, 0) - return returnitem - -def heappushpop(heap, item): - """Fast version of a heappush followed by a heappop.""" - if heap and cmp_lt(heap[0], item): - item, heap[0] = heap[0], item - _siftup(heap, 0) - return item - -def heapify(x): - """Transform list into a heap, in-place, in O(len(x)) time.""" - n = len(x) - # Transform bottom-up. The largest index there's any point to looking at - # is the largest with a child index in-range, so must have 2*i + 1 < n, - # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so - # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is - # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1. - for i in reversed(xrange(n//2)): - _siftup(x, i) - -def _heappushpop_max(heap, item): - """Maxheap version of a heappush followed by a heappop.""" - if heap and cmp_lt(item, heap[0]): - item, heap[0] = heap[0], item - _siftup_max(heap, 0) - return item - -def _heapify_max(x): - """Transform list into a maxheap, in-place, in O(len(x)) time.""" - n = len(x) - for i in reversed(range(n//2)): - _siftup_max(x, i) - -def nlargest(n, iterable): - """Find the n largest elements in a dataset. - - Equivalent to: sorted(iterable, reverse=True)[:n] - """ - if n < 0: - return [] - it = iter(iterable) - result = list(islice(it, n)) - if not result: - return result - heapify(result) - _heappushpop = heappushpop - for elem in it: - _heappushpop(result, elem) - result.sort(reverse=True) - return result - -def nsmallest(n, iterable): - """Find the n smallest elements in a dataset. - - Equivalent to: sorted(iterable)[:n] - """ - if n < 0: - return [] - it = iter(iterable) - result = list(islice(it, n)) - if not result: - return result - _heapify_max(result) - _heappushpop = _heappushpop_max - for elem in it: - _heappushpop(result, elem) - result.sort() - return result - -# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos -# is the index of a leaf with a possibly out-of-order value. Restore the -# heap invariant. -def _siftdown(heap, startpos, pos): - newitem = heap[pos] - # Follow the path to the root, moving parents down until finding a place - # newitem fits. - while pos > startpos: - parentpos = (pos - 1) >> 1 - parent = heap[parentpos] - if cmp_lt(newitem, parent): - heap[pos] = parent - pos = parentpos - continue - break - heap[pos] = newitem - -# The child indices of heap index pos are already heaps, and we want to make -# a heap at index pos too. We do this by bubbling the smaller child of -# pos up (and so on with that child's children, etc) until hitting a leaf, -# then using _siftdown to move the oddball originally at index pos into place. -# -# We *could* break out of the loop as soon as we find a pos where newitem <= -# both its children, but turns out that's not a good idea, and despite that -# many books write the algorithm that way. During a heap pop, the last array -# element is sifted in, and that tends to be large, so that comparing it -# against values starting from the root usually doesn't pay (= usually doesn't -# get us out of the loop early). See Knuth, Volume 3, where this is -# explained and quantified in an exercise. -# -# Cutting the # of comparisons is important, since these routines have no -# way to extract "the priority" from an array element, so that intelligence -# is likely to be hiding in custom __cmp__ methods, or in array elements -# storing (priority, record) tuples. Comparisons are thus potentially -# expensive. -# -# On random arrays of length 1000, making this change cut the number of -# comparisons made by heapify() a little, and those made by exhaustive -# heappop() a lot, in accord with theory. Here are typical results from 3 -# runs (3 just to demonstrate how small the variance is): -# -# Compares needed by heapify Compares needed by 1000 heappops -# -------------------------- -------------------------------- -# 1837 cut to 1663 14996 cut to 8680 -# 1855 cut to 1659 14966 cut to 8678 -# 1847 cut to 1660 15024 cut to 8703 -# -# Building the heap by using heappush() 1000 times instead required -# 2198, 2148, and 2219 compares: heapify() is more efficient, when -# you can use it. -# -# The total compares needed by list.sort() on the same lists were 8627, -# 8627, and 8632 (this should be compared to the sum of heapify() and -# heappop() compares): list.sort() is (unsurprisingly!) more efficient -# for sorting. - -def _siftup(heap, pos): - endpos = len(heap) - startpos = pos - newitem = heap[pos] - # Bubble up the smaller child until hitting a leaf. - childpos = 2*pos + 1 # leftmost child position - while childpos < endpos: - # Set childpos to index of smaller child. - rightpos = childpos + 1 - if rightpos < endpos and not cmp_lt(heap[childpos], heap[rightpos]): - childpos = rightpos - # Move the smaller child up. - heap[pos] = heap[childpos] - pos = childpos - childpos = 2*pos + 1 - # The leaf at pos is empty now. Put newitem there, and bubble it up - # to its final resting place (by sifting its parents down). - heap[pos] = newitem - _siftdown(heap, startpos, pos) - -def _siftdown_max(heap, startpos, pos): - 'Maxheap variant of _siftdown' - newitem = heap[pos] - # Follow the path to the root, moving parents down until finding a place - # newitem fits. - while pos > startpos: - parentpos = (pos - 1) >> 1 - parent = heap[parentpos] - if cmp_lt(parent, newitem): - heap[pos] = parent - pos = parentpos - continue - break - heap[pos] = newitem - -def _siftup_max(heap, pos): - 'Maxheap variant of _siftup' - endpos = len(heap) - startpos = pos - newitem = heap[pos] - # Bubble up the larger child until hitting a leaf. - childpos = 2*pos + 1 # leftmost child position - while childpos < endpos: - # Set childpos to index of larger child. - rightpos = childpos + 1 - if rightpos < endpos and not cmp_lt(heap[rightpos], heap[childpos]): - childpos = rightpos - # Move the larger child up. - heap[pos] = heap[childpos] - pos = childpos - childpos = 2*pos + 1 - # The leaf at pos is empty now. Put newitem there, and bubble it up - # to its final resting place (by sifting its parents down). - heap[pos] = newitem - _siftdown_max(heap, startpos, pos) - -# If available, use C implementation -try: - from _heapq import * -except ImportError: - pass - -def merge(*iterables): - '''Merge multiple sorted inputs into a single sorted output. - - Similar to sorted(itertools.chain(*iterables)) but returns a generator, - does not pull the data into memory all at once, and assumes that each of - the input streams is already sorted (smallest to largest). - - >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25])) - [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] - - ''' - _heappop, _heapreplace, _StopIteration = heappop, heapreplace, StopIteration - _len = len - - h = [] - h_append = h.append - for itnum, it in enumerate(map(iter, iterables)): - try: - next = it.next - h_append([next(), itnum, next]) - except _StopIteration: - pass - heapify(h) - - while _len(h) > 1: - try: - while 1: - v, itnum, next = s = h[0] - yield v - s[0] = next() # raises StopIteration when exhausted - _heapreplace(h, s) # restore heap condition - except _StopIteration: - _heappop(h) # remove empty iterator - if h: - # fast case when only a single iterator remains - v, itnum, next = h[0] - yield v - for v in next.__self__: - yield v - -# Extend the implementations of nsmallest and nlargest to use a key= argument -_nsmallest = nsmallest -def nsmallest(n, iterable, key=None): - """Find the n smallest elements in a dataset. - - Equivalent to: sorted(iterable, key=key)[:n] - """ - # Short-cut for n==1 is to use min() when len(iterable)>0 - if n == 1: - it = iter(iterable) - head = list(islice(it, 1)) - if not head: - return [] - if key is None: - return [min(chain(head, it))] - return [min(chain(head, it), key=key)] - - # When n>=size, it's faster to use sorted() - try: - size = len(iterable) - except (TypeError, AttributeError): - pass - else: - if n >= size: - return sorted(iterable, key=key)[:n] - - # When key is none, use simpler decoration - if key is None: - it = izip(iterable, count()) # decorate - result = _nsmallest(n, it) - return map(itemgetter(0), result) # undecorate - - # General case, slowest method - in1, in2 = tee(iterable) - it = izip(imap(key, in1), count(), in2) # decorate - result = _nsmallest(n, it) - return map(itemgetter(2), result) # undecorate - -_nlargest = nlargest -def nlargest(n, iterable, key=None): - """Find the n largest elements in a dataset. - - Equivalent to: sorted(iterable, key=key, reverse=True)[:n] - """ - - # Short-cut for n==1 is to use max() when len(iterable)>0 - if n == 1: - it = iter(iterable) - head = list(islice(it, 1)) - if not head: - return [] - if key is None: - return [max(chain(head, it))] - return [max(chain(head, it), key=key)] - - # When n>=size, it's faster to use sorted() - try: - size = len(iterable) - except (TypeError, AttributeError): - pass - else: - if n >= size: - return sorted(iterable, key=key, reverse=True)[:n] - - # When key is none, use simpler decoration - if key is None: - it = izip(iterable, count(0,-1)) # decorate - result = _nlargest(n, it) - return map(itemgetter(0), result) # undecorate - - # General case, slowest method - in1, in2 = tee(iterable) - it = izip(imap(key, in1), count(0,-1), in2) # decorate - result = _nlargest(n, it) - return map(itemgetter(2), result) # undecorate - -if __name__ == "__main__": - # Simple sanity test - heap = [] - data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0] - for item in data: - heappush(heap, item) - sort = [] - while heap: - sort.append(heappop(heap)) - print sort - - import doctest - doctest.testmod() diff --git a/python/Lib/hmac.py b/python/Lib/hmac.py deleted file mode 100755 index 9cd1a9fd91..0000000000 --- a/python/Lib/hmac.py +++ /dev/null @@ -1,136 +0,0 @@ -"""HMAC (Keyed-Hashing for Message Authentication) Python module. - -Implements the HMAC algorithm as described by RFC 2104. -""" - -import warnings as _warnings - -from operator import _compare_digest as compare_digest - - -trans_5C = "".join ([chr (x ^ 0x5C) for x in xrange(256)]) -trans_36 = "".join ([chr (x ^ 0x36) for x in xrange(256)]) - -# The size of the digests returned by HMAC depends on the underlying -# hashing module used. Use digest_size from the instance of HMAC instead. -digest_size = None - -# A unique object passed by HMAC.copy() to the HMAC constructor, in order -# that the latter return very quickly. HMAC("") in contrast is quite -# expensive. -_secret_backdoor_key = [] - -class HMAC: - """RFC 2104 HMAC class. Also complies with RFC 4231. - - This supports the API for Cryptographic Hash Functions (PEP 247). - """ - blocksize = 64 # 512-bit HMAC; can be changed in subclasses. - - def __init__(self, key, msg = None, digestmod = None): - """Create a new HMAC object. - - key: key for the keyed hash object. - msg: Initial input for the hash, if provided. - digestmod: A module supporting PEP 247. *OR* - A hashlib constructor returning a new hash object. - Defaults to hashlib.md5. - """ - - if key is _secret_backdoor_key: # cheap - return - - if digestmod is None: - import hashlib - digestmod = hashlib.md5 - - if hasattr(digestmod, '__call__'): - self.digest_cons = digestmod - else: - self.digest_cons = lambda d='': digestmod.new(d) - - self.outer = self.digest_cons() - self.inner = self.digest_cons() - self.digest_size = self.inner.digest_size - - if hasattr(self.inner, 'block_size'): - blocksize = self.inner.block_size - if blocksize < 16: - # Very low blocksize, most likely a legacy value like - # Lib/sha.py and Lib/md5.py have. - _warnings.warn('block_size of %d seems too small; using our ' - 'default of %d.' % (blocksize, self.blocksize), - RuntimeWarning, 2) - blocksize = self.blocksize - else: - _warnings.warn('No block_size attribute on given digest object; ' - 'Assuming %d.' % (self.blocksize), - RuntimeWarning, 2) - blocksize = self.blocksize - - if len(key) > blocksize: - key = self.digest_cons(key).digest() - - key = key + chr(0) * (blocksize - len(key)) - self.outer.update(key.translate(trans_5C)) - self.inner.update(key.translate(trans_36)) - if msg is not None: - self.update(msg) - -## def clear(self): -## raise NotImplementedError, "clear() method not available in HMAC." - - def update(self, msg): - """Update this hashing object with the string msg. - """ - self.inner.update(msg) - - def copy(self): - """Return a separate copy of this hashing object. - - An update to this copy won't affect the original object. - """ - other = self.__class__(_secret_backdoor_key) - other.digest_cons = self.digest_cons - other.digest_size = self.digest_size - other.inner = self.inner.copy() - other.outer = self.outer.copy() - return other - - def _current(self): - """Return a hash object for the current state. - - To be used only internally with digest() and hexdigest(). - """ - h = self.outer.copy() - h.update(self.inner.digest()) - return h - - def digest(self): - """Return the hash value of this hashing object. - - This returns a string containing 8-bit data. The object is - not altered in any way by this function; you can continue - updating the object after calling this function. - """ - h = self._current() - return h.digest() - - def hexdigest(self): - """Like digest(), but returns a string of hexadecimal digits instead. - """ - h = self._current() - return h.hexdigest() - -def new(key, msg = None, digestmod = None): - """Create a new hashing object and return it. - - key: The starting key for the hash. - msg: if available, will immediately be hashed into the object's starting - state. - - You can now feed arbitrary strings into the object using its update() - method, and can ask for the hash value at any time by calling its digest() - method. - """ - return HMAC(key, msg, digestmod) diff --git a/python/Lib/hotshot/__init__.py b/python/Lib/hotshot/__init__.py deleted file mode 100755 index 3692f93033..0000000000 --- a/python/Lib/hotshot/__init__.py +++ /dev/null @@ -1,78 +0,0 @@ -"""High-perfomance logging profiler, mostly written in C.""" - -import _hotshot -from _hotshot import ProfilerError - -from warnings import warnpy3k as _warnpy3k -_warnpy3k("The 'hotshot' module is not supported in 3.x, " - "use the 'profile' module instead.", stacklevel=2) - -class Profile: - def __init__(self, logfn, lineevents=0, linetimings=1): - self.lineevents = lineevents and 1 or 0 - self.linetimings = (linetimings and lineevents) and 1 or 0 - self._prof = p = _hotshot.profiler( - logfn, self.lineevents, self.linetimings) - - # Attempt to avoid confusing results caused by the presence of - # Python wrappers around these functions, but only if we can - # be sure the methods have not been overridden or extended. - if self.__class__ is Profile: - self.close = p.close - self.start = p.start - self.stop = p.stop - self.addinfo = p.addinfo - - def close(self): - """Close the logfile and terminate the profiler.""" - self._prof.close() - - def fileno(self): - """Return the file descriptor of the profiler's log file.""" - return self._prof.fileno() - - def start(self): - """Start the profiler.""" - self._prof.start() - - def stop(self): - """Stop the profiler.""" - self._prof.stop() - - def addinfo(self, key, value): - """Add an arbitrary labelled value to the profile log.""" - self._prof.addinfo(key, value) - - # These methods offer the same interface as the profile.Profile class, - # but delegate most of the work to the C implementation underneath. - - def run(self, cmd): - """Profile an exec-compatible string in the script - environment. - - The globals from the __main__ module are used as both the - globals and locals for the script. - """ - import __main__ - dict = __main__.__dict__ - return self.runctx(cmd, dict, dict) - - def runctx(self, cmd, globals, locals): - """Evaluate an exec-compatible string in a specific - environment. - - The string is compiled before profiling begins. - """ - code = compile(cmd, "", "exec") - self._prof.runcode(code, globals, locals) - return self - - def runcall(self, func, *args, **kw): - """Profile a single call of a callable. - - Additional positional and keyword arguments may be passed - along; the result of the call is returned, and exceptions are - allowed to propagate cleanly, while ensuring that profiling is - disabled on the way out. - """ - return self._prof.runcall(func, args, kw) diff --git a/python/Lib/hotshot/log.py b/python/Lib/hotshot/log.py deleted file mode 100755 index 17e8b50515..0000000000 --- a/python/Lib/hotshot/log.py +++ /dev/null @@ -1,194 +0,0 @@ -import _hotshot -import os.path -import parser -import symbol - -from _hotshot import \ - WHAT_ENTER, \ - WHAT_EXIT, \ - WHAT_LINENO, \ - WHAT_DEFINE_FILE, \ - WHAT_DEFINE_FUNC, \ - WHAT_ADD_INFO - - -__all__ = ["LogReader", "ENTER", "EXIT", "LINE"] - - -ENTER = WHAT_ENTER -EXIT = WHAT_EXIT -LINE = WHAT_LINENO - - -class LogReader: - def __init__(self, logfn): - # fileno -> filename - self._filemap = {} - # (fileno, lineno) -> filename, funcname - self._funcmap = {} - - self._reader = _hotshot.logreader(logfn) - self._nextitem = self._reader.next - self._info = self._reader.info - if 'current-directory' in self._info: - self.cwd = self._info['current-directory'] - else: - self.cwd = None - - # This mirrors the call stack of the profiled code as the log - # is read back in. It contains tuples of the form: - # - # (file name, line number of function def, function name) - # - self._stack = [] - self._append = self._stack.append - self._pop = self._stack.pop - - def close(self): - self._reader.close() - - def fileno(self): - """Return the file descriptor of the log reader's log file.""" - return self._reader.fileno() - - def addinfo(self, key, value): - """This method is called for each additional ADD_INFO record. - - This can be overridden by applications that want to receive - these events. The default implementation does not need to be - called by alternate implementations. - - The initial set of ADD_INFO records do not pass through this - mechanism; this is only needed to receive notification when - new values are added. Subclasses can inspect self._info after - calling LogReader.__init__(). - """ - pass - - def get_filename(self, fileno): - try: - return self._filemap[fileno] - except KeyError: - raise ValueError, "unknown fileno" - - def get_filenames(self): - return self._filemap.values() - - def get_fileno(self, filename): - filename = os.path.normcase(os.path.normpath(filename)) - for fileno, name in self._filemap.items(): - if name == filename: - return fileno - raise ValueError, "unknown filename" - - def get_funcname(self, fileno, lineno): - try: - return self._funcmap[(fileno, lineno)] - except KeyError: - raise ValueError, "unknown function location" - - # Iteration support: - # This adds an optional (& ignored) parameter to next() so that the - # same bound method can be used as the __getitem__() method -- this - # avoids using an additional method call which kills the performance. - - def next(self, index=0): - while 1: - # This call may raise StopIteration: - what, tdelta, fileno, lineno = self._nextitem() - - # handle the most common cases first - - if what == WHAT_ENTER: - filename, funcname = self._decode_location(fileno, lineno) - t = (filename, lineno, funcname) - self._append(t) - return what, t, tdelta - - if what == WHAT_EXIT: - try: - return what, self._pop(), tdelta - except IndexError: - raise StopIteration - - if what == WHAT_LINENO: - filename, firstlineno, funcname = self._stack[-1] - return what, (filename, lineno, funcname), tdelta - - if what == WHAT_DEFINE_FILE: - filename = os.path.normcase(os.path.normpath(tdelta)) - self._filemap[fileno] = filename - elif what == WHAT_DEFINE_FUNC: - filename = self._filemap[fileno] - self._funcmap[(fileno, lineno)] = (filename, tdelta) - elif what == WHAT_ADD_INFO: - # value already loaded into self.info; call the - # overridable addinfo() handler so higher-level code - # can pick up the new value - if tdelta == 'current-directory': - self.cwd = lineno - self.addinfo(tdelta, lineno) - else: - raise ValueError, "unknown event type" - - def __iter__(self): - return self - - # - # helpers - # - - def _decode_location(self, fileno, lineno): - try: - return self._funcmap[(fileno, lineno)] - except KeyError: - # - # This should only be needed when the log file does not - # contain all the DEFINE_FUNC records needed to allow the - # function name to be retrieved from the log file. - # - if self._loadfile(fileno): - filename = funcname = None - try: - filename, funcname = self._funcmap[(fileno, lineno)] - except KeyError: - filename = self._filemap.get(fileno) - funcname = None - self._funcmap[(fileno, lineno)] = (filename, funcname) - return filename, funcname - - def _loadfile(self, fileno): - try: - filename = self._filemap[fileno] - except KeyError: - print "Could not identify fileId", fileno - return 1 - if filename is None: - return 1 - absname = os.path.normcase(os.path.join(self.cwd, filename)) - - try: - fp = open(absname) - except IOError: - return - st = parser.suite(fp.read()) - fp.close() - - # Scan the tree looking for def and lambda nodes, filling in - # self._funcmap with all the available information. - funcdef = symbol.funcdef - lambdef = symbol.lambdef - - stack = [st.totuple(1)] - - while stack: - tree = stack.pop() - try: - sym = tree[0] - except (IndexError, TypeError): - continue - if sym == funcdef: - self._funcmap[(fileno, tree[2][2])] = filename, tree[2][1] - elif sym == lambdef: - self._funcmap[(fileno, tree[1][2])] = filename, "" - stack.extend(list(tree[1:])) diff --git a/python/Lib/hotshot/stats.py b/python/Lib/hotshot/stats.py deleted file mode 100755 index 7ff2277a18..0000000000 --- a/python/Lib/hotshot/stats.py +++ /dev/null @@ -1,93 +0,0 @@ -"""Statistics analyzer for HotShot.""" - -import profile -import pstats - -import hotshot.log - -from hotshot.log import ENTER, EXIT - - -def load(filename): - return StatsLoader(filename).load() - - -class StatsLoader: - def __init__(self, logfn): - self._logfn = logfn - self._code = {} - self._stack = [] - self.pop_frame = self._stack.pop - - def load(self): - # The timer selected by the profiler should never be used, so make - # sure it doesn't work: - p = Profile() - p.get_time = _brokentimer - log = hotshot.log.LogReader(self._logfn) - taccum = 0 - for event in log: - what, (filename, lineno, funcname), tdelta = event - if tdelta > 0: - taccum += tdelta - - # We multiply taccum to convert from the microseconds we - # have to the seconds that the profile/pstats module work - # with; this allows the numbers to have some basis in - # reality (ignoring calibration issues for now). - - if what == ENTER: - frame = self.new_frame(filename, lineno, funcname) - p.trace_dispatch_call(frame, taccum * .000001) - taccum = 0 - - elif what == EXIT: - frame = self.pop_frame() - p.trace_dispatch_return(frame, taccum * .000001) - taccum = 0 - - # no further work for line events - - assert not self._stack - return pstats.Stats(p) - - def new_frame(self, *args): - # args must be filename, firstlineno, funcname - # our code objects are cached since we don't need to create - # new ones every time - try: - code = self._code[args] - except KeyError: - code = FakeCode(*args) - self._code[args] = code - # frame objects are create fresh, since the back pointer will - # vary considerably - if self._stack: - back = self._stack[-1] - else: - back = None - frame = FakeFrame(code, back) - self._stack.append(frame) - return frame - - -class Profile(profile.Profile): - def simulate_cmd_complete(self): - pass - - -class FakeCode: - def __init__(self, filename, firstlineno, funcname): - self.co_filename = filename - self.co_firstlineno = firstlineno - self.co_name = self.__name__ = funcname - - -class FakeFrame: - def __init__(self, code, back): - self.f_back = back - self.f_code = code - - -def _brokentimer(): - raise RuntimeError, "this timer should not be called" diff --git a/python/Lib/hotshot/stones.py b/python/Lib/hotshot/stones.py deleted file mode 100755 index fb88d581d3..0000000000 --- a/python/Lib/hotshot/stones.py +++ /dev/null @@ -1,30 +0,0 @@ -import errno -import hotshot -import hotshot.stats -import sys -import test.pystone - -def main(logfile): - p = hotshot.Profile(logfile) - benchtime, stones = p.runcall(test.pystone.pystones) - p.close() - - print "Pystone(%s) time for %d passes = %g" % \ - (test.pystone.__version__, test.pystone.LOOPS, benchtime) - print "This machine benchmarks at %g pystones/second" % stones - - stats = hotshot.stats.load(logfile) - stats.strip_dirs() - stats.sort_stats('time', 'calls') - try: - stats.print_stats(20) - except IOError, e: - if e.errno != errno.EPIPE: - raise - -if __name__ == '__main__': - if sys.argv[1:]: - main(sys.argv[1]) - else: - import tempfile - main(tempfile.NamedTemporaryFile().name) diff --git a/python/Lib/htmlentitydefs.py b/python/Lib/htmlentitydefs.py deleted file mode 100755 index 1f40d09d57..0000000000 --- a/python/Lib/htmlentitydefs.py +++ /dev/null @@ -1,273 +0,0 @@ -"""HTML character entity references.""" - -# maps the HTML entity name to the Unicode code point -name2codepoint = { - 'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 - 'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1 - 'Acirc': 0x00c2, # latin capital letter A with circumflex, U+00C2 ISOlat1 - 'Agrave': 0x00c0, # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 - 'Alpha': 0x0391, # greek capital letter alpha, U+0391 - 'Aring': 0x00c5, # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 - 'Atilde': 0x00c3, # latin capital letter A with tilde, U+00C3 ISOlat1 - 'Auml': 0x00c4, # latin capital letter A with diaeresis, U+00C4 ISOlat1 - 'Beta': 0x0392, # greek capital letter beta, U+0392 - 'Ccedil': 0x00c7, # latin capital letter C with cedilla, U+00C7 ISOlat1 - 'Chi': 0x03a7, # greek capital letter chi, U+03A7 - 'Dagger': 0x2021, # double dagger, U+2021 ISOpub - 'Delta': 0x0394, # greek capital letter delta, U+0394 ISOgrk3 - 'ETH': 0x00d0, # latin capital letter ETH, U+00D0 ISOlat1 - 'Eacute': 0x00c9, # latin capital letter E with acute, U+00C9 ISOlat1 - 'Ecirc': 0x00ca, # latin capital letter E with circumflex, U+00CA ISOlat1 - 'Egrave': 0x00c8, # latin capital letter E with grave, U+00C8 ISOlat1 - 'Epsilon': 0x0395, # greek capital letter epsilon, U+0395 - 'Eta': 0x0397, # greek capital letter eta, U+0397 - 'Euml': 0x00cb, # latin capital letter E with diaeresis, U+00CB ISOlat1 - 'Gamma': 0x0393, # greek capital letter gamma, U+0393 ISOgrk3 - 'Iacute': 0x00cd, # latin capital letter I with acute, U+00CD ISOlat1 - 'Icirc': 0x00ce, # latin capital letter I with circumflex, U+00CE ISOlat1 - 'Igrave': 0x00cc, # latin capital letter I with grave, U+00CC ISOlat1 - 'Iota': 0x0399, # greek capital letter iota, U+0399 - 'Iuml': 0x00cf, # latin capital letter I with diaeresis, U+00CF ISOlat1 - 'Kappa': 0x039a, # greek capital letter kappa, U+039A - 'Lambda': 0x039b, # greek capital letter lambda, U+039B ISOgrk3 - 'Mu': 0x039c, # greek capital letter mu, U+039C - 'Ntilde': 0x00d1, # latin capital letter N with tilde, U+00D1 ISOlat1 - 'Nu': 0x039d, # greek capital letter nu, U+039D - 'OElig': 0x0152, # latin capital ligature OE, U+0152 ISOlat2 - 'Oacute': 0x00d3, # latin capital letter O with acute, U+00D3 ISOlat1 - 'Ocirc': 0x00d4, # latin capital letter O with circumflex, U+00D4 ISOlat1 - 'Ograve': 0x00d2, # latin capital letter O with grave, U+00D2 ISOlat1 - 'Omega': 0x03a9, # greek capital letter omega, U+03A9 ISOgrk3 - 'Omicron': 0x039f, # greek capital letter omicron, U+039F - 'Oslash': 0x00d8, # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 - 'Otilde': 0x00d5, # latin capital letter O with tilde, U+00D5 ISOlat1 - 'Ouml': 0x00d6, # latin capital letter O with diaeresis, U+00D6 ISOlat1 - 'Phi': 0x03a6, # greek capital letter phi, U+03A6 ISOgrk3 - 'Pi': 0x03a0, # greek capital letter pi, U+03A0 ISOgrk3 - 'Prime': 0x2033, # double prime = seconds = inches, U+2033 ISOtech - 'Psi': 0x03a8, # greek capital letter psi, U+03A8 ISOgrk3 - 'Rho': 0x03a1, # greek capital letter rho, U+03A1 - 'Scaron': 0x0160, # latin capital letter S with caron, U+0160 ISOlat2 - 'Sigma': 0x03a3, # greek capital letter sigma, U+03A3 ISOgrk3 - 'THORN': 0x00de, # latin capital letter THORN, U+00DE ISOlat1 - 'Tau': 0x03a4, # greek capital letter tau, U+03A4 - 'Theta': 0x0398, # greek capital letter theta, U+0398 ISOgrk3 - 'Uacute': 0x00da, # latin capital letter U with acute, U+00DA ISOlat1 - 'Ucirc': 0x00db, # latin capital letter U with circumflex, U+00DB ISOlat1 - 'Ugrave': 0x00d9, # latin capital letter U with grave, U+00D9 ISOlat1 - 'Upsilon': 0x03a5, # greek capital letter upsilon, U+03A5 ISOgrk3 - 'Uuml': 0x00dc, # latin capital letter U with diaeresis, U+00DC ISOlat1 - 'Xi': 0x039e, # greek capital letter xi, U+039E ISOgrk3 - 'Yacute': 0x00dd, # latin capital letter Y with acute, U+00DD ISOlat1 - 'Yuml': 0x0178, # latin capital letter Y with diaeresis, U+0178 ISOlat2 - 'Zeta': 0x0396, # greek capital letter zeta, U+0396 - 'aacute': 0x00e1, # latin small letter a with acute, U+00E1 ISOlat1 - 'acirc': 0x00e2, # latin small letter a with circumflex, U+00E2 ISOlat1 - 'acute': 0x00b4, # acute accent = spacing acute, U+00B4 ISOdia - 'aelig': 0x00e6, # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 - 'agrave': 0x00e0, # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 - 'alefsym': 0x2135, # alef symbol = first transfinite cardinal, U+2135 NEW - 'alpha': 0x03b1, # greek small letter alpha, U+03B1 ISOgrk3 - 'amp': 0x0026, # ampersand, U+0026 ISOnum - 'and': 0x2227, # logical and = wedge, U+2227 ISOtech - 'ang': 0x2220, # angle, U+2220 ISOamso - 'aring': 0x00e5, # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 - 'asymp': 0x2248, # almost equal to = asymptotic to, U+2248 ISOamsr - 'atilde': 0x00e3, # latin small letter a with tilde, U+00E3 ISOlat1 - 'auml': 0x00e4, # latin small letter a with diaeresis, U+00E4 ISOlat1 - 'bdquo': 0x201e, # double low-9 quotation mark, U+201E NEW - 'beta': 0x03b2, # greek small letter beta, U+03B2 ISOgrk3 - 'brvbar': 0x00a6, # broken bar = broken vertical bar, U+00A6 ISOnum - 'bull': 0x2022, # bullet = black small circle, U+2022 ISOpub - 'cap': 0x2229, # intersection = cap, U+2229 ISOtech - 'ccedil': 0x00e7, # latin small letter c with cedilla, U+00E7 ISOlat1 - 'cedil': 0x00b8, # cedilla = spacing cedilla, U+00B8 ISOdia - 'cent': 0x00a2, # cent sign, U+00A2 ISOnum - 'chi': 0x03c7, # greek small letter chi, U+03C7 ISOgrk3 - 'circ': 0x02c6, # modifier letter circumflex accent, U+02C6 ISOpub - 'clubs': 0x2663, # black club suit = shamrock, U+2663 ISOpub - 'cong': 0x2245, # approximately equal to, U+2245 ISOtech - 'copy': 0x00a9, # copyright sign, U+00A9 ISOnum - 'crarr': 0x21b5, # downwards arrow with corner leftwards = carriage return, U+21B5 NEW - 'cup': 0x222a, # union = cup, U+222A ISOtech - 'curren': 0x00a4, # currency sign, U+00A4 ISOnum - 'dArr': 0x21d3, # downwards double arrow, U+21D3 ISOamsa - 'dagger': 0x2020, # dagger, U+2020 ISOpub - 'darr': 0x2193, # downwards arrow, U+2193 ISOnum - 'deg': 0x00b0, # degree sign, U+00B0 ISOnum - 'delta': 0x03b4, # greek small letter delta, U+03B4 ISOgrk3 - 'diams': 0x2666, # black diamond suit, U+2666 ISOpub - 'divide': 0x00f7, # division sign, U+00F7 ISOnum - 'eacute': 0x00e9, # latin small letter e with acute, U+00E9 ISOlat1 - 'ecirc': 0x00ea, # latin small letter e with circumflex, U+00EA ISOlat1 - 'egrave': 0x00e8, # latin small letter e with grave, U+00E8 ISOlat1 - 'empty': 0x2205, # empty set = null set = diameter, U+2205 ISOamso - 'emsp': 0x2003, # em space, U+2003 ISOpub - 'ensp': 0x2002, # en space, U+2002 ISOpub - 'epsilon': 0x03b5, # greek small letter epsilon, U+03B5 ISOgrk3 - 'equiv': 0x2261, # identical to, U+2261 ISOtech - 'eta': 0x03b7, # greek small letter eta, U+03B7 ISOgrk3 - 'eth': 0x00f0, # latin small letter eth, U+00F0 ISOlat1 - 'euml': 0x00eb, # latin small letter e with diaeresis, U+00EB ISOlat1 - 'euro': 0x20ac, # euro sign, U+20AC NEW - 'exist': 0x2203, # there exists, U+2203 ISOtech - 'fnof': 0x0192, # latin small f with hook = function = florin, U+0192 ISOtech - 'forall': 0x2200, # for all, U+2200 ISOtech - 'frac12': 0x00bd, # vulgar fraction one half = fraction one half, U+00BD ISOnum - 'frac14': 0x00bc, # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum - 'frac34': 0x00be, # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum - 'frasl': 0x2044, # fraction slash, U+2044 NEW - 'gamma': 0x03b3, # greek small letter gamma, U+03B3 ISOgrk3 - 'ge': 0x2265, # greater-than or equal to, U+2265 ISOtech - 'gt': 0x003e, # greater-than sign, U+003E ISOnum - 'hArr': 0x21d4, # left right double arrow, U+21D4 ISOamsa - 'harr': 0x2194, # left right arrow, U+2194 ISOamsa - 'hearts': 0x2665, # black heart suit = valentine, U+2665 ISOpub - 'hellip': 0x2026, # horizontal ellipsis = three dot leader, U+2026 ISOpub - 'iacute': 0x00ed, # latin small letter i with acute, U+00ED ISOlat1 - 'icirc': 0x00ee, # latin small letter i with circumflex, U+00EE ISOlat1 - 'iexcl': 0x00a1, # inverted exclamation mark, U+00A1 ISOnum - 'igrave': 0x00ec, # latin small letter i with grave, U+00EC ISOlat1 - 'image': 0x2111, # blackletter capital I = imaginary part, U+2111 ISOamso - 'infin': 0x221e, # infinity, U+221E ISOtech - 'int': 0x222b, # integral, U+222B ISOtech - 'iota': 0x03b9, # greek small letter iota, U+03B9 ISOgrk3 - 'iquest': 0x00bf, # inverted question mark = turned question mark, U+00BF ISOnum - 'isin': 0x2208, # element of, U+2208 ISOtech - 'iuml': 0x00ef, # latin small letter i with diaeresis, U+00EF ISOlat1 - 'kappa': 0x03ba, # greek small letter kappa, U+03BA ISOgrk3 - 'lArr': 0x21d0, # leftwards double arrow, U+21D0 ISOtech - 'lambda': 0x03bb, # greek small letter lambda, U+03BB ISOgrk3 - 'lang': 0x2329, # left-pointing angle bracket = bra, U+2329 ISOtech - 'laquo': 0x00ab, # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum - 'larr': 0x2190, # leftwards arrow, U+2190 ISOnum - 'lceil': 0x2308, # left ceiling = apl upstile, U+2308 ISOamsc - 'ldquo': 0x201c, # left double quotation mark, U+201C ISOnum - 'le': 0x2264, # less-than or equal to, U+2264 ISOtech - 'lfloor': 0x230a, # left floor = apl downstile, U+230A ISOamsc - 'lowast': 0x2217, # asterisk operator, U+2217 ISOtech - 'loz': 0x25ca, # lozenge, U+25CA ISOpub - 'lrm': 0x200e, # left-to-right mark, U+200E NEW RFC 2070 - 'lsaquo': 0x2039, # single left-pointing angle quotation mark, U+2039 ISO proposed - 'lsquo': 0x2018, # left single quotation mark, U+2018 ISOnum - 'lt': 0x003c, # less-than sign, U+003C ISOnum - 'macr': 0x00af, # macron = spacing macron = overline = APL overbar, U+00AF ISOdia - 'mdash': 0x2014, # em dash, U+2014 ISOpub - 'micro': 0x00b5, # micro sign, U+00B5 ISOnum - 'middot': 0x00b7, # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum - 'minus': 0x2212, # minus sign, U+2212 ISOtech - 'mu': 0x03bc, # greek small letter mu, U+03BC ISOgrk3 - 'nabla': 0x2207, # nabla = backward difference, U+2207 ISOtech - 'nbsp': 0x00a0, # no-break space = non-breaking space, U+00A0 ISOnum - 'ndash': 0x2013, # en dash, U+2013 ISOpub - 'ne': 0x2260, # not equal to, U+2260 ISOtech - 'ni': 0x220b, # contains as member, U+220B ISOtech - 'not': 0x00ac, # not sign, U+00AC ISOnum - 'notin': 0x2209, # not an element of, U+2209 ISOtech - 'nsub': 0x2284, # not a subset of, U+2284 ISOamsn - 'ntilde': 0x00f1, # latin small letter n with tilde, U+00F1 ISOlat1 - 'nu': 0x03bd, # greek small letter nu, U+03BD ISOgrk3 - 'oacute': 0x00f3, # latin small letter o with acute, U+00F3 ISOlat1 - 'ocirc': 0x00f4, # latin small letter o with circumflex, U+00F4 ISOlat1 - 'oelig': 0x0153, # latin small ligature oe, U+0153 ISOlat2 - 'ograve': 0x00f2, # latin small letter o with grave, U+00F2 ISOlat1 - 'oline': 0x203e, # overline = spacing overscore, U+203E NEW - 'omega': 0x03c9, # greek small letter omega, U+03C9 ISOgrk3 - 'omicron': 0x03bf, # greek small letter omicron, U+03BF NEW - 'oplus': 0x2295, # circled plus = direct sum, U+2295 ISOamsb - 'or': 0x2228, # logical or = vee, U+2228 ISOtech - 'ordf': 0x00aa, # feminine ordinal indicator, U+00AA ISOnum - 'ordm': 0x00ba, # masculine ordinal indicator, U+00BA ISOnum - 'oslash': 0x00f8, # latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1 - 'otilde': 0x00f5, # latin small letter o with tilde, U+00F5 ISOlat1 - 'otimes': 0x2297, # circled times = vector product, U+2297 ISOamsb - 'ouml': 0x00f6, # latin small letter o with diaeresis, U+00F6 ISOlat1 - 'para': 0x00b6, # pilcrow sign = paragraph sign, U+00B6 ISOnum - 'part': 0x2202, # partial differential, U+2202 ISOtech - 'permil': 0x2030, # per mille sign, U+2030 ISOtech - 'perp': 0x22a5, # up tack = orthogonal to = perpendicular, U+22A5 ISOtech - 'phi': 0x03c6, # greek small letter phi, U+03C6 ISOgrk3 - 'pi': 0x03c0, # greek small letter pi, U+03C0 ISOgrk3 - 'piv': 0x03d6, # greek pi symbol, U+03D6 ISOgrk3 - 'plusmn': 0x00b1, # plus-minus sign = plus-or-minus sign, U+00B1 ISOnum - 'pound': 0x00a3, # pound sign, U+00A3 ISOnum - 'prime': 0x2032, # prime = minutes = feet, U+2032 ISOtech - 'prod': 0x220f, # n-ary product = product sign, U+220F ISOamsb - 'prop': 0x221d, # proportional to, U+221D ISOtech - 'psi': 0x03c8, # greek small letter psi, U+03C8 ISOgrk3 - 'quot': 0x0022, # quotation mark = APL quote, U+0022 ISOnum - 'rArr': 0x21d2, # rightwards double arrow, U+21D2 ISOtech - 'radic': 0x221a, # square root = radical sign, U+221A ISOtech - 'rang': 0x232a, # right-pointing angle bracket = ket, U+232A ISOtech - 'raquo': 0x00bb, # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum - 'rarr': 0x2192, # rightwards arrow, U+2192 ISOnum - 'rceil': 0x2309, # right ceiling, U+2309 ISOamsc - 'rdquo': 0x201d, # right double quotation mark, U+201D ISOnum - 'real': 0x211c, # blackletter capital R = real part symbol, U+211C ISOamso - 'reg': 0x00ae, # registered sign = registered trade mark sign, U+00AE ISOnum - 'rfloor': 0x230b, # right floor, U+230B ISOamsc - 'rho': 0x03c1, # greek small letter rho, U+03C1 ISOgrk3 - 'rlm': 0x200f, # right-to-left mark, U+200F NEW RFC 2070 - 'rsaquo': 0x203a, # single right-pointing angle quotation mark, U+203A ISO proposed - 'rsquo': 0x2019, # right single quotation mark, U+2019 ISOnum - 'sbquo': 0x201a, # single low-9 quotation mark, U+201A NEW - 'scaron': 0x0161, # latin small letter s with caron, U+0161 ISOlat2 - 'sdot': 0x22c5, # dot operator, U+22C5 ISOamsb - 'sect': 0x00a7, # section sign, U+00A7 ISOnum - 'shy': 0x00ad, # soft hyphen = discretionary hyphen, U+00AD ISOnum - 'sigma': 0x03c3, # greek small letter sigma, U+03C3 ISOgrk3 - 'sigmaf': 0x03c2, # greek small letter final sigma, U+03C2 ISOgrk3 - 'sim': 0x223c, # tilde operator = varies with = similar to, U+223C ISOtech - 'spades': 0x2660, # black spade suit, U+2660 ISOpub - 'sub': 0x2282, # subset of, U+2282 ISOtech - 'sube': 0x2286, # subset of or equal to, U+2286 ISOtech - 'sum': 0x2211, # n-ary sumation, U+2211 ISOamsb - 'sup': 0x2283, # superset of, U+2283 ISOtech - 'sup1': 0x00b9, # superscript one = superscript digit one, U+00B9 ISOnum - 'sup2': 0x00b2, # superscript two = superscript digit two = squared, U+00B2 ISOnum - 'sup3': 0x00b3, # superscript three = superscript digit three = cubed, U+00B3 ISOnum - 'supe': 0x2287, # superset of or equal to, U+2287 ISOtech - 'szlig': 0x00df, # latin small letter sharp s = ess-zed, U+00DF ISOlat1 - 'tau': 0x03c4, # greek small letter tau, U+03C4 ISOgrk3 - 'there4': 0x2234, # therefore, U+2234 ISOtech - 'theta': 0x03b8, # greek small letter theta, U+03B8 ISOgrk3 - 'thetasym': 0x03d1, # greek small letter theta symbol, U+03D1 NEW - 'thinsp': 0x2009, # thin space, U+2009 ISOpub - 'thorn': 0x00fe, # latin small letter thorn with, U+00FE ISOlat1 - 'tilde': 0x02dc, # small tilde, U+02DC ISOdia - 'times': 0x00d7, # multiplication sign, U+00D7 ISOnum - 'trade': 0x2122, # trade mark sign, U+2122 ISOnum - 'uArr': 0x21d1, # upwards double arrow, U+21D1 ISOamsa - 'uacute': 0x00fa, # latin small letter u with acute, U+00FA ISOlat1 - 'uarr': 0x2191, # upwards arrow, U+2191 ISOnum - 'ucirc': 0x00fb, # latin small letter u with circumflex, U+00FB ISOlat1 - 'ugrave': 0x00f9, # latin small letter u with grave, U+00F9 ISOlat1 - 'uml': 0x00a8, # diaeresis = spacing diaeresis, U+00A8 ISOdia - 'upsih': 0x03d2, # greek upsilon with hook symbol, U+03D2 NEW - 'upsilon': 0x03c5, # greek small letter upsilon, U+03C5 ISOgrk3 - 'uuml': 0x00fc, # latin small letter u with diaeresis, U+00FC ISOlat1 - 'weierp': 0x2118, # script capital P = power set = Weierstrass p, U+2118 ISOamso - 'xi': 0x03be, # greek small letter xi, U+03BE ISOgrk3 - 'yacute': 0x00fd, # latin small letter y with acute, U+00FD ISOlat1 - 'yen': 0x00a5, # yen sign = yuan sign, U+00A5 ISOnum - 'yuml': 0x00ff, # latin small letter y with diaeresis, U+00FF ISOlat1 - 'zeta': 0x03b6, # greek small letter zeta, U+03B6 ISOgrk3 - 'zwj': 0x200d, # zero width joiner, U+200D NEW RFC 2070 - 'zwnj': 0x200c, # zero width non-joiner, U+200C NEW RFC 2070 -} - -# maps the Unicode code point to the HTML entity name -codepoint2name = {} - -# maps the HTML entity name to the character -# (or a character reference if the character is outside the Latin-1 range) -entitydefs = {} - -for (name, codepoint) in name2codepoint.iteritems(): - codepoint2name[codepoint] = name - if codepoint <= 0xff: - entitydefs[name] = chr(codepoint) - else: - entitydefs[name] = '&#%d;' % codepoint - -del name, codepoint diff --git a/python/Lib/htmllib.py b/python/Lib/htmllib.py deleted file mode 100755 index 44647dbf02..0000000000 --- a/python/Lib/htmllib.py +++ /dev/null @@ -1,491 +0,0 @@ -"""HTML 2.0 parser. - -See the HTML 2.0 specification: -http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html -""" - -from warnings import warnpy3k -warnpy3k("the htmllib module has been removed in Python 3.0", - stacklevel=2) -del warnpy3k - -import sgmllib - -from formatter import AS_IS - -__all__ = ["HTMLParser", "HTMLParseError"] - - -class HTMLParseError(sgmllib.SGMLParseError): - """Error raised when an HTML document can't be parsed.""" - - -class HTMLParser(sgmllib.SGMLParser): - """This is the basic HTML parser class. - - It supports all entity names required by the XHTML 1.0 Recommendation. - It also defines handlers for all HTML 2.0 and many HTML 3.0 and 3.2 - elements. - - """ - - from htmlentitydefs import entitydefs - - def __init__(self, formatter, verbose=0): - """Creates an instance of the HTMLParser class. - - The formatter parameter is the formatter instance associated with - the parser. - - """ - sgmllib.SGMLParser.__init__(self, verbose) - self.formatter = formatter - - def error(self, message): - raise HTMLParseError(message) - - def reset(self): - sgmllib.SGMLParser.reset(self) - self.savedata = None - self.isindex = 0 - self.title = None - self.base = None - self.anchor = None - self.anchorlist = [] - self.nofill = 0 - self.list_stack = [] - - # ------ Methods used internally; some may be overridden - - # --- Formatter interface, taking care of 'savedata' mode; - # shouldn't need to be overridden - - def handle_data(self, data): - if self.savedata is not None: - self.savedata = self.savedata + data - else: - if self.nofill: - self.formatter.add_literal_data(data) - else: - self.formatter.add_flowing_data(data) - - # --- Hooks to save data; shouldn't need to be overridden - - def save_bgn(self): - """Begins saving character data in a buffer instead of sending it - to the formatter object. - - Retrieve the stored data via the save_end() method. Use of the - save_bgn() / save_end() pair may not be nested. - - """ - self.savedata = '' - - def save_end(self): - """Ends buffering character data and returns all data saved since - the preceding call to the save_bgn() method. - - If the nofill flag is false, whitespace is collapsed to single - spaces. A call to this method without a preceding call to the - save_bgn() method will raise a TypeError exception. - - """ - data = self.savedata - self.savedata = None - if not self.nofill: - data = ' '.join(data.split()) - return data - - # --- Hooks for anchors; should probably be overridden - - def anchor_bgn(self, href, name, type): - """This method is called at the start of an anchor region. - - The arguments correspond to the attributes of the tag with - the same names. The default implementation maintains a list of - hyperlinks (defined by the HREF attribute for tags) within - the document. The list of hyperlinks is available as the data - attribute anchorlist. - - """ - self.anchor = href - if self.anchor: - self.anchorlist.append(href) - - def anchor_end(self): - """This method is called at the end of an anchor region. - - The default implementation adds a textual footnote marker using an - index into the list of hyperlinks created by the anchor_bgn()method. - - """ - if self.anchor: - self.handle_data("[%d]" % len(self.anchorlist)) - self.anchor = None - - # --- Hook for images; should probably be overridden - - def handle_image(self, src, alt, *args): - """This method is called to handle images. - - The default implementation simply passes the alt value to the - handle_data() method. - - """ - self.handle_data(alt) - - # --------- Top level elememts - - def start_html(self, attrs): pass - def end_html(self): pass - - def start_head(self, attrs): pass - def end_head(self): pass - - def start_body(self, attrs): pass - def end_body(self): pass - - # ------ Head elements - - def start_title(self, attrs): - self.save_bgn() - - def end_title(self): - self.title = self.save_end() - - def do_base(self, attrs): - for a, v in attrs: - if a == 'href': - self.base = v - - def do_isindex(self, attrs): - self.isindex = 1 - - def do_link(self, attrs): - pass - - def do_meta(self, attrs): - pass - - def do_nextid(self, attrs): # Deprecated - pass - - # ------ Body elements - - # --- Headings - - def start_h1(self, attrs): - self.formatter.end_paragraph(1) - self.formatter.push_font(('h1', 0, 1, 0)) - - def end_h1(self): - self.formatter.end_paragraph(1) - self.formatter.pop_font() - - def start_h2(self, attrs): - self.formatter.end_paragraph(1) - self.formatter.push_font(('h2', 0, 1, 0)) - - def end_h2(self): - self.formatter.end_paragraph(1) - self.formatter.pop_font() - - def start_h3(self, attrs): - self.formatter.end_paragraph(1) - self.formatter.push_font(('h3', 0, 1, 0)) - - def end_h3(self): - self.formatter.end_paragraph(1) - self.formatter.pop_font() - - def start_h4(self, attrs): - self.formatter.end_paragraph(1) - self.formatter.push_font(('h4', 0, 1, 0)) - - def end_h4(self): - self.formatter.end_paragraph(1) - self.formatter.pop_font() - - def start_h5(self, attrs): - self.formatter.end_paragraph(1) - self.formatter.push_font(('h5', 0, 1, 0)) - - def end_h5(self): - self.formatter.end_paragraph(1) - self.formatter.pop_font() - - def start_h6(self, attrs): - self.formatter.end_paragraph(1) - self.formatter.push_font(('h6', 0, 1, 0)) - - def end_h6(self): - self.formatter.end_paragraph(1) - self.formatter.pop_font() - - # --- Block Structuring Elements - - def do_p(self, attrs): - self.formatter.end_paragraph(1) - - def start_pre(self, attrs): - self.formatter.end_paragraph(1) - self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1)) - self.nofill = self.nofill + 1 - - def end_pre(self): - self.formatter.end_paragraph(1) - self.formatter.pop_font() - self.nofill = max(0, self.nofill - 1) - - def start_xmp(self, attrs): - self.start_pre(attrs) - self.setliteral('xmp') # Tell SGML parser - - def end_xmp(self): - self.end_pre() - - def start_listing(self, attrs): - self.start_pre(attrs) - self.setliteral('listing') # Tell SGML parser - - def end_listing(self): - self.end_pre() - - def start_address(self, attrs): - self.formatter.end_paragraph(0) - self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS)) - - def end_address(self): - self.formatter.end_paragraph(0) - self.formatter.pop_font() - - def start_blockquote(self, attrs): - self.formatter.end_paragraph(1) - self.formatter.push_margin('blockquote') - - def end_blockquote(self): - self.formatter.end_paragraph(1) - self.formatter.pop_margin() - - # --- List Elements - - def start_ul(self, attrs): - self.formatter.end_paragraph(not self.list_stack) - self.formatter.push_margin('ul') - self.list_stack.append(['ul', '*', 0]) - - def end_ul(self): - if self.list_stack: del self.list_stack[-1] - self.formatter.end_paragraph(not self.list_stack) - self.formatter.pop_margin() - - def do_li(self, attrs): - self.formatter.end_paragraph(0) - if self.list_stack: - [dummy, label, counter] = top = self.list_stack[-1] - top[2] = counter = counter+1 - else: - label, counter = '*', 0 - self.formatter.add_label_data(label, counter) - - def start_ol(self, attrs): - self.formatter.end_paragraph(not self.list_stack) - self.formatter.push_margin('ol') - label = '1.' - for a, v in attrs: - if a == 'type': - if len(v) == 1: v = v + '.' - label = v - self.list_stack.append(['ol', label, 0]) - - def end_ol(self): - if self.list_stack: del self.list_stack[-1] - self.formatter.end_paragraph(not self.list_stack) - self.formatter.pop_margin() - - def start_menu(self, attrs): - self.start_ul(attrs) - - def end_menu(self): - self.end_ul() - - def start_dir(self, attrs): - self.start_ul(attrs) - - def end_dir(self): - self.end_ul() - - def start_dl(self, attrs): - self.formatter.end_paragraph(1) - self.list_stack.append(['dl', '', 0]) - - def end_dl(self): - self.ddpop(1) - if self.list_stack: del self.list_stack[-1] - - def do_dt(self, attrs): - self.ddpop() - - def do_dd(self, attrs): - self.ddpop() - self.formatter.push_margin('dd') - self.list_stack.append(['dd', '', 0]) - - def ddpop(self, bl=0): - self.formatter.end_paragraph(bl) - if self.list_stack: - if self.list_stack[-1][0] == 'dd': - del self.list_stack[-1] - self.formatter.pop_margin() - - # --- Phrase Markup - - # Idiomatic Elements - - def start_cite(self, attrs): self.start_i(attrs) - def end_cite(self): self.end_i() - - def start_code(self, attrs): self.start_tt(attrs) - def end_code(self): self.end_tt() - - def start_em(self, attrs): self.start_i(attrs) - def end_em(self): self.end_i() - - def start_kbd(self, attrs): self.start_tt(attrs) - def end_kbd(self): self.end_tt() - - def start_samp(self, attrs): self.start_tt(attrs) - def end_samp(self): self.end_tt() - - def start_strong(self, attrs): self.start_b(attrs) - def end_strong(self): self.end_b() - - def start_var(self, attrs): self.start_i(attrs) - def end_var(self): self.end_i() - - # Typographic Elements - - def start_i(self, attrs): - self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS)) - def end_i(self): - self.formatter.pop_font() - - def start_b(self, attrs): - self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS)) - def end_b(self): - self.formatter.pop_font() - - def start_tt(self, attrs): - self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1)) - def end_tt(self): - self.formatter.pop_font() - - def start_a(self, attrs): - href = '' - name = '' - type = '' - for attrname, value in attrs: - value = value.strip() - if attrname == 'href': - href = value - if attrname == 'name': - name = value - if attrname == 'type': - type = value.lower() - self.anchor_bgn(href, name, type) - - def end_a(self): - self.anchor_end() - - # --- Line Break - - def do_br(self, attrs): - self.formatter.add_line_break() - - # --- Horizontal Rule - - def do_hr(self, attrs): - self.formatter.add_hor_rule() - - # --- Image - - def do_img(self, attrs): - align = '' - alt = '(image)' - ismap = '' - src = '' - width = 0 - height = 0 - for attrname, value in attrs: - if attrname == 'align': - align = value - if attrname == 'alt': - alt = value - if attrname == 'ismap': - ismap = value - if attrname == 'src': - src = value - if attrname == 'width': - try: width = int(value) - except ValueError: pass - if attrname == 'height': - try: height = int(value) - except ValueError: pass - self.handle_image(src, alt, ismap, align, width, height) - - # --- Really Old Unofficial Deprecated Stuff - - def do_plaintext(self, attrs): - self.start_pre(attrs) - self.setnomoretags() # Tell SGML parser - - # --- Unhandled tags - - def unknown_starttag(self, tag, attrs): - pass - - def unknown_endtag(self, tag): - pass - - -def test(args = None): - import sys, formatter - - if not args: - args = sys.argv[1:] - - silent = args and args[0] == '-s' - if silent: - del args[0] - - if args: - file = args[0] - else: - file = 'test.html' - - if file == '-': - f = sys.stdin - else: - try: - f = open(file, 'r') - except IOError, msg: - print file, ":", msg - sys.exit(1) - - data = f.read() - - if f is not sys.stdin: - f.close() - - if silent: - f = formatter.NullFormatter() - else: - f = formatter.AbstractFormatter(formatter.DumbWriter()) - - p = HTMLParser(f) - p.feed(data) - p.close() - - -if __name__ == '__main__': - test() diff --git a/python/Lib/httplib.py b/python/Lib/httplib.py deleted file mode 100755 index cc8ad8197a..0000000000 --- a/python/Lib/httplib.py +++ /dev/null @@ -1,1430 +0,0 @@ -r"""HTTP/1.1 client library - - - - -HTTPConnection goes through a number of "states", which define when a client -may legally make another request or fetch the response for a particular -request. This diagram details these state transitions: - - (null) - | - | HTTPConnection() - v - Idle - | - | putrequest() - v - Request-started - | - | ( putheader() )* endheaders() - v - Request-sent - | - | response = getresponse() - v - Unread-response [Response-headers-read] - |\____________________ - | | - | response.read() | putrequest() - v v - Idle Req-started-unread-response - ______/| - / | - response.read() | | ( putheader() )* endheaders() - v v - Request-started Req-sent-unread-response - | - | response.read() - v - Request-sent - -This diagram presents the following rules: - -- a second request may not be started until {response-headers-read} - -- a response [object] cannot be retrieved until {request-sent} - -- there is no differentiation between an unread response body and a - partially read response body - -Note: this enforcement is applied by the HTTPConnection class. The - HTTPResponse class does not enforce this state machine, which - implies sophisticated clients may accelerate the request/response - pipeline. Caution should be taken, though: accelerating the states - beyond the above pattern may imply knowledge of the server's - connection-close behavior for certain requests. For example, it - is impossible to tell whether the server will close the connection - UNTIL the response headers have been read; this means that further - requests cannot be placed into the pipeline until it is known that - the server will NOT be closing the connection. - -Logical State __state __response -------------- ------- ---------- -Idle _CS_IDLE None -Request-started _CS_REQ_STARTED None -Request-sent _CS_REQ_SENT None -Unread-response _CS_IDLE -Req-started-unread-response _CS_REQ_STARTED -Req-sent-unread-response _CS_REQ_SENT -""" - -from array import array -import os -import re -import socket -from sys import py3kwarning -from urlparse import urlsplit -import warnings -with warnings.catch_warnings(): - if py3kwarning: - warnings.filterwarnings("ignore", ".*mimetools has been removed", - DeprecationWarning) - import mimetools - -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", - "HTTPException", "NotConnected", "UnknownProtocol", - "UnknownTransferEncoding", "UnimplementedFileMode", - "IncompleteRead", "InvalidURL", "ImproperConnectionState", - "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", - "BadStatusLine", "error", "responses"] - -HTTP_PORT = 80 -HTTPS_PORT = 443 - -_UNKNOWN = 'UNKNOWN' - -# connection states -_CS_IDLE = 'Idle' -_CS_REQ_STARTED = 'Request-started' -_CS_REQ_SENT = 'Request-sent' - -# status codes -# informational -CONTINUE = 100 -SWITCHING_PROTOCOLS = 101 -PROCESSING = 102 - -# successful -OK = 200 -CREATED = 201 -ACCEPTED = 202 -NON_AUTHORITATIVE_INFORMATION = 203 -NO_CONTENT = 204 -RESET_CONTENT = 205 -PARTIAL_CONTENT = 206 -MULTI_STATUS = 207 -IM_USED = 226 - -# redirection -MULTIPLE_CHOICES = 300 -MOVED_PERMANENTLY = 301 -FOUND = 302 -SEE_OTHER = 303 -NOT_MODIFIED = 304 -USE_PROXY = 305 -TEMPORARY_REDIRECT = 307 - -# client error -BAD_REQUEST = 400 -UNAUTHORIZED = 401 -PAYMENT_REQUIRED = 402 -FORBIDDEN = 403 -NOT_FOUND = 404 -METHOD_NOT_ALLOWED = 405 -NOT_ACCEPTABLE = 406 -PROXY_AUTHENTICATION_REQUIRED = 407 -REQUEST_TIMEOUT = 408 -CONFLICT = 409 -GONE = 410 -LENGTH_REQUIRED = 411 -PRECONDITION_FAILED = 412 -REQUEST_ENTITY_TOO_LARGE = 413 -REQUEST_URI_TOO_LONG = 414 -UNSUPPORTED_MEDIA_TYPE = 415 -REQUESTED_RANGE_NOT_SATISFIABLE = 416 -EXPECTATION_FAILED = 417 -UNPROCESSABLE_ENTITY = 422 -LOCKED = 423 -FAILED_DEPENDENCY = 424 -UPGRADE_REQUIRED = 426 - -# server error -INTERNAL_SERVER_ERROR = 500 -NOT_IMPLEMENTED = 501 -BAD_GATEWAY = 502 -SERVICE_UNAVAILABLE = 503 -GATEWAY_TIMEOUT = 504 -HTTP_VERSION_NOT_SUPPORTED = 505 -INSUFFICIENT_STORAGE = 507 -NOT_EXTENDED = 510 - -# Mapping status codes to official W3C names -responses = { - 100: 'Continue', - 101: 'Switching Protocols', - - 200: 'OK', - 201: 'Created', - 202: 'Accepted', - 203: 'Non-Authoritative Information', - 204: 'No Content', - 205: 'Reset Content', - 206: 'Partial Content', - - 300: 'Multiple Choices', - 301: 'Moved Permanently', - 302: 'Found', - 303: 'See Other', - 304: 'Not Modified', - 305: 'Use Proxy', - 306: '(Unused)', - 307: 'Temporary Redirect', - - 400: 'Bad Request', - 401: 'Unauthorized', - 402: 'Payment Required', - 403: 'Forbidden', - 404: 'Not Found', - 405: 'Method Not Allowed', - 406: 'Not Acceptable', - 407: 'Proxy Authentication Required', - 408: 'Request Timeout', - 409: 'Conflict', - 410: 'Gone', - 411: 'Length Required', - 412: 'Precondition Failed', - 413: 'Request Entity Too Large', - 414: 'Request-URI Too Long', - 415: 'Unsupported Media Type', - 416: 'Requested Range Not Satisfiable', - 417: 'Expectation Failed', - - 500: 'Internal Server Error', - 501: 'Not Implemented', - 502: 'Bad Gateway', - 503: 'Service Unavailable', - 504: 'Gateway Timeout', - 505: 'HTTP Version Not Supported', -} - -# maximal amount of data to read at one time in _safe_read -MAXAMOUNT = 1048576 - -# maximal line length when calling readline(). -_MAXLINE = 65536 - -# maximum amount of headers accepted -_MAXHEADERS = 100 - -# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) -# -# VCHAR = %x21-7E -# obs-text = %x80-FF -# header-field = field-name ":" OWS field-value OWS -# field-name = token -# field-value = *( field-content / obs-fold ) -# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] -# field-vchar = VCHAR / obs-text -# -# obs-fold = CRLF 1*( SP / HTAB ) -# ; obsolete line folding -# ; see Section 3.2.4 - -# token = 1*tchar -# -# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" -# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" -# / DIGIT / ALPHA -# ; any VCHAR, except delimiters -# -# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 - -# the patterns for both name and value are more lenient than RFC -# definitions to allow for backwards compatibility -_is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match -_is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search - -# We always set the Content-Length header for these methods because some -# servers will otherwise respond with a 411 -_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} - - -class HTTPMessage(mimetools.Message): - - def addheader(self, key, value): - """Add header for field key handling repeats.""" - prev = self.dict.get(key) - if prev is None: - self.dict[key] = value - else: - combined = ", ".join((prev, value)) - self.dict[key] = combined - - def addcontinue(self, key, more): - """Add more field data from a continuation line.""" - prev = self.dict[key] - self.dict[key] = prev + "\n " + more - - def readheaders(self): - """Read header lines. - - Read header lines up to the entirely blank line that terminates them. - The (normally blank) line that ends the headers is skipped, but not - included in the returned list. If an invalid line is found in the - header section, it is skipped, and further lines are processed. - - The variable self.status is set to the empty string if all went well, - otherwise it is an error message. The variable self.headers is a - completely uninterpreted list of lines contained in the header (so - printing them will reproduce the header exactly as it appears in the - file). - - If multiple header fields with the same name occur, they are combined - according to the rules in RFC 2616 sec 4.2: - - Appending each subsequent field-value to the first, each separated - by a comma. The order in which header fields with the same field-name - are received is significant to the interpretation of the combined - field value. - """ - # XXX The implementation overrides the readheaders() method of - # rfc822.Message. The base class design isn't amenable to - # customized behavior here so the method here is a copy of the - # base class code with a few small changes. - - self.dict = {} - self.unixfrom = '' - self.headers = hlist = [] - self.status = '' - headerseen = "" - firstline = 1 - tell = None - if not hasattr(self.fp, 'unread') and self.seekable: - tell = self.fp.tell - while True: - if len(hlist) > _MAXHEADERS: - raise HTTPException("got more than %d headers" % _MAXHEADERS) - if tell: - try: - tell() - except IOError: - tell = None - self.seekable = 0 - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - if not line: - self.status = 'EOF in headers' - break - # Skip unix From name time lines - if firstline and line.startswith('From '): - self.unixfrom = self.unixfrom + line - continue - firstline = 0 - if headerseen and line[0] in ' \t': - # XXX Not sure if continuation lines are handled properly - # for http and/or for repeating headers - # It's a continuation line. - hlist.append(line) - self.addcontinue(headerseen, line.strip()) - continue - elif self.iscomment(line): - # It's a comment. Ignore it. - continue - elif self.islast(line): - # Note! No pushback here! The delimiter line gets eaten. - break - headerseen = self.isheader(line) - if headerseen: - # It's a legal header line, save it. - hlist.append(line) - self.addheader(headerseen, line[len(headerseen)+1:].strip()) - elif headerseen is not None: - # An empty header name. These aren't allowed in HTTP, but it's - # probably a benign mistake. Don't add the header, just keep - # going. - pass - else: - # It's not a header line; skip it and try the next line. - self.status = 'Non-header line where header expected' - -class HTTPResponse: - - # strict: If true, raise BadStatusLine if the status line can't be - # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is - # false because it prevents clients from talking to HTTP/0.9 - # servers. Note that a response with a sufficiently corrupted - # status line will look like an HTTP/0.9 response. - - # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. - - def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False): - if buffering: - # The caller won't be using any sock.recv() calls, so buffering - # is fine and recommended for performance. - self.fp = sock.makefile('rb') - else: - # The buffer size is specified as zero, because the headers of - # the response are read with readline(). If the reads were - # buffered the readline() calls could consume some of the - # response, which make be read via a recv() on the underlying - # socket. - self.fp = sock.makefile('rb', 0) - self.debuglevel = debuglevel - self.strict = strict - self._method = method - - self.msg = None - - # from the Status-Line of the response - self.version = _UNKNOWN # HTTP-Version - self.status = _UNKNOWN # Status-Code - self.reason = _UNKNOWN # Reason-Phrase - - self.chunked = _UNKNOWN # is "chunked" being used? - self.chunk_left = _UNKNOWN # bytes left to read in current chunk - self.length = _UNKNOWN # number of bytes left in response - self.will_close = _UNKNOWN # conn will close at end of response - - def _read_status(self): - # Initialize with Simple-Response defaults - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - if self.debuglevel > 0: - print "reply:", repr(line) - if not line: - # Presumably, the server closed the connection before - # sending a valid response. - raise BadStatusLine(line) - try: - [version, status, reason] = line.split(None, 2) - except ValueError: - try: - [version, status] = line.split(None, 1) - reason = "" - except ValueError: - # empty version will cause next test to fail and status - # will be treated as 0.9 response. - version = "" - if not version.startswith('HTTP/'): - if self.strict: - self.close() - raise BadStatusLine(line) - else: - # assume it's a Simple-Response from an 0.9 server - self.fp = LineAndFileWrapper(line, self.fp) - return "HTTP/0.9", 200, "" - - # The status code is a three-digit number - try: - status = int(status) - if status < 100 or status > 999: - raise BadStatusLine(line) - except ValueError: - raise BadStatusLine(line) - return version, status, reason - - def begin(self): - if self.msg is not None: - # we've already started reading the response - return - - # read until we get a non-100 response - while True: - version, status, reason = self._read_status() - if status != CONTINUE: - break - # skip the header from the 100 response - while True: - skip = self.fp.readline(_MAXLINE + 1) - if len(skip) > _MAXLINE: - raise LineTooLong("header line") - skip = skip.strip() - if not skip: - break - if self.debuglevel > 0: - print "header:", skip - - self.status = status - self.reason = reason.strip() - if version == 'HTTP/1.0': - self.version = 10 - elif version.startswith('HTTP/1.'): - self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 - elif version == 'HTTP/0.9': - self.version = 9 - else: - raise UnknownProtocol(version) - - if self.version == 9: - self.length = None - self.chunked = 0 - self.will_close = 1 - self.msg = HTTPMessage(StringIO()) - return - - self.msg = HTTPMessage(self.fp, 0) - if self.debuglevel > 0: - for hdr in self.msg.headers: - print "header:", hdr, - - # don't let the msg keep an fp - self.msg.fp = None - - # are we using the chunked-style of transfer encoding? - tr_enc = self.msg.getheader('transfer-encoding') - if tr_enc and tr_enc.lower() == "chunked": - self.chunked = 1 - self.chunk_left = None - else: - self.chunked = 0 - - # will the connection close at the end of the response? - self.will_close = self._check_close() - - # do we have a Content-Length? - # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" - length = self.msg.getheader('content-length') - if length and not self.chunked: - try: - self.length = int(length) - except ValueError: - self.length = None - else: - if self.length < 0: # ignore nonsensical negative lengths - self.length = None - else: - self.length = None - - # does the body have a fixed length? (of zero) - if (status == NO_CONTENT or status == NOT_MODIFIED or - 100 <= status < 200 or # 1xx codes - self._method == 'HEAD'): - self.length = 0 - - # if the connection remains open, and we aren't using chunked, and - # a content-length was not provided, then assume that the connection - # WILL close. - if not self.will_close and \ - not self.chunked and \ - self.length is None: - self.will_close = 1 - - def _check_close(self): - conn = self.msg.getheader('connection') - if self.version == 11: - # An HTTP/1.1 proxy is assumed to stay open unless - # explicitly closed. - conn = self.msg.getheader('connection') - if conn and "close" in conn.lower(): - return True - return False - - # Some HTTP/1.0 implementations have support for persistent - # connections, using rules different than HTTP/1.1. - - # For older HTTP, Keep-Alive indicates persistent connection. - if self.msg.getheader('keep-alive'): - return False - - # At least Akamai returns a "Connection: Keep-Alive" header, - # which was supposed to be sent by the client. - if conn and "keep-alive" in conn.lower(): - return False - - # Proxy-Connection is a netscape hack. - pconn = self.msg.getheader('proxy-connection') - if pconn and "keep-alive" in pconn.lower(): - return False - - # otherwise, assume it will close - return True - - def close(self): - fp = self.fp - if fp: - self.fp = None - fp.close() - - def isclosed(self): - # NOTE: it is possible that we will not ever call self.close(). This - # case occurs when will_close is TRUE, length is None, and we - # read up to the last byte, but NOT past it. - # - # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be - # called, meaning self.isclosed() is meaningful. - return self.fp is None - - # XXX It would be nice to have readline and __iter__ for this, too. - - def read(self, amt=None): - if self.fp is None: - return '' - - if self._method == 'HEAD': - self.close() - return '' - - if self.chunked: - return self._read_chunked(amt) - - if amt is None: - # unbounded read - if self.length is None: - s = self.fp.read() - else: - try: - s = self._safe_read(self.length) - except IncompleteRead: - self.close() - raise - self.length = 0 - self.close() # we read everything - return s - - if self.length is not None: - if amt > self.length: - # clip the read to the "end of response" - amt = self.length - - # we do not use _safe_read() here because this may be a .will_close - # connection, and the user is reading more bytes than will be provided - # (for example, reading in 1k chunks) - s = self.fp.read(amt) - if not s and amt: - # Ideally, we would raise IncompleteRead if the content-length - # wasn't satisfied, but it might break compatibility. - self.close() - if self.length is not None: - self.length -= len(s) - if not self.length: - self.close() - - return s - - def _read_chunked(self, amt): - assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left - value = [] - while True: - if chunk_left is None: - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("chunk size") - i = line.find(';') - if i >= 0: - line = line[:i] # strip chunk-extensions - try: - chunk_left = int(line, 16) - except ValueError: - # close the connection as protocol synchronisation is - # probably lost - self.close() - raise IncompleteRead(''.join(value)) - if chunk_left == 0: - break - if amt is None: - value.append(self._safe_read(chunk_left)) - elif amt < chunk_left: - value.append(self._safe_read(amt)) - self.chunk_left = chunk_left - amt - return ''.join(value) - elif amt == chunk_left: - value.append(self._safe_read(amt)) - self._safe_read(2) # toss the CRLF at the end of the chunk - self.chunk_left = None - return ''.join(value) - else: - value.append(self._safe_read(chunk_left)) - amt -= chunk_left - - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - # read and discard trailer up to the CRLF terminator - ### note: we shouldn't have any trailers! - while True: - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("trailer line") - if not line: - # a vanishingly small number of sites EOF without - # sending the trailer - break - if line == '\r\n': - break - - # we read everything; close the "file" - self.close() - - return ''.join(value) - - def _safe_read(self, amt): - """Read the number of bytes requested, compensating for partial reads. - - Normally, we have a blocking socket, but a read() can be interrupted - by a signal (resulting in a partial read). - - Note that we cannot distinguish between EOF and an interrupt when zero - bytes have been read. IncompleteRead() will be raised in this - situation. - - This function should be used when bytes "should" be present for - reading. If the bytes are truly not available (due to EOF), then the - IncompleteRead exception can be used to detect the problem. - """ - # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never - # return less than x bytes unless EOF is encountered. It now handles - # signal interruptions (socket.error EINTR) internally. This code - # never caught that exception anyways. It seems largely pointless. - # self.fp.read(amt) will work fine. - s = [] - while amt > 0: - chunk = self.fp.read(min(amt, MAXAMOUNT)) - if not chunk: - raise IncompleteRead(''.join(s), amt) - s.append(chunk) - amt -= len(chunk) - return ''.join(s) - - def fileno(self): - return self.fp.fileno() - - def getheader(self, name, default=None): - if self.msg is None: - raise ResponseNotReady() - return self.msg.getheader(name, default) - - def getheaders(self): - """Return list of (header, value) tuples.""" - if self.msg is None: - raise ResponseNotReady() - return self.msg.items() - - -class HTTPConnection: - - _http_vsn = 11 - _http_vsn_str = 'HTTP/1.1' - - response_class = HTTPResponse - default_port = HTTP_PORT - auto_open = 1 - debuglevel = 0 - strict = 0 - - def __init__(self, host, port=None, strict=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): - self.timeout = timeout - self.source_address = source_address - self.sock = None - self._buffer = [] - self.__response = None - self.__state = _CS_IDLE - self._method = None - self._tunnel_host = None - self._tunnel_port = None - self._tunnel_headers = {} - if strict is not None: - self.strict = strict - - (self.host, self.port) = self._get_hostport(host, port) - - # This is stored as an instance variable to allow unittests - # to replace with a suitable mock - self._create_connection = socket.create_connection - - def set_tunnel(self, host, port=None, headers=None): - """ Set up host and port for HTTP CONNECT tunnelling. - - In a connection that uses HTTP Connect tunneling, the host passed to the - constructor is used as proxy server that relays all communication to the - endpoint passed to set_tunnel. This is done by sending a HTTP CONNECT - request to the proxy server when the connection is established. - - This method must be called before the HTTP connection has been - established. - - The headers argument should be a mapping of extra HTTP headers - to send with the CONNECT request. - """ - # Verify if this is required. - if self.sock: - raise RuntimeError("Can't setup tunnel for established connection.") - - self._tunnel_host, self._tunnel_port = self._get_hostport(host, port) - if headers: - self._tunnel_headers = headers - else: - self._tunnel_headers.clear() - - def _get_hostport(self, host, port): - if port is None: - i = host.rfind(':') - j = host.rfind(']') # ipv6 addresses have [...] - if i > j: - try: - port = int(host[i+1:]) - except ValueError: - if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ - port = self.default_port - else: - raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) - host = host[:i] - else: - port = self.default_port - if host and host[0] == '[' and host[-1] == ']': - host = host[1:-1] - return (host, port) - - def set_debuglevel(self, level): - self.debuglevel = level - - def _tunnel(self): - self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host, - self._tunnel_port)) - for header, value in self._tunnel_headers.iteritems(): - self.send("%s: %s\r\n" % (header, value)) - self.send("\r\n") - response = self.response_class(self.sock, strict = self.strict, - method = self._method) - (version, code, message) = response._read_status() - - if version == "HTTP/0.9": - # HTTP/0.9 doesn't support the CONNECT verb, so if httplib has - # concluded HTTP/0.9 is being used something has gone wrong. - self.close() - raise socket.error("Invalid response from tunnel request") - if code != 200: - self.close() - raise socket.error("Tunnel connection failed: %d %s" % (code, - message.strip())) - while True: - line = response.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - if not line: - # for sites which EOF without sending trailer - break - if line == '\r\n': - break - - - def connect(self): - """Connect to the host and port specified in __init__.""" - self.sock = self._create_connection((self.host,self.port), - self.timeout, self.source_address) - - if self._tunnel_host: - self._tunnel() - - def close(self): - """Close the connection to the HTTP server.""" - self.__state = _CS_IDLE - try: - sock = self.sock - if sock: - self.sock = None - sock.close() # close it manually... there may be other refs - finally: - response = self.__response - if response: - self.__response = None - response.close() - - def send(self, data): - """Send `data' to the server.""" - if self.sock is None: - if self.auto_open: - self.connect() - else: - raise NotConnected() - - if self.debuglevel > 0: - print "send:", repr(data) - blocksize = 8192 - if hasattr(data,'read') and not isinstance(data, array): - if self.debuglevel > 0: print "sendIng a read()able" - datablock = data.read(blocksize) - while datablock: - self.sock.sendall(datablock) - datablock = data.read(blocksize) - else: - self.sock.sendall(data) - - def _output(self, s): - """Add a line of output to the current request buffer. - - Assumes that the line does *not* end with \\r\\n. - """ - self._buffer.append(s) - - def _send_output(self, message_body=None): - """Send the currently buffered request and clear the buffer. - - Appends an extra \\r\\n to the buffer. - A message_body may be specified, to be appended to the request. - """ - self._buffer.extend(("", "")) - msg = "\r\n".join(self._buffer) - del self._buffer[:] - # If msg and message_body are sent in a single send() call, - # it will avoid performance problems caused by the interaction - # between delayed ack and the Nagle algorithm. - if isinstance(message_body, str): - msg += message_body - message_body = None - self.send(msg) - if message_body is not None: - #message_body was not a string (i.e. it is a file) and - #we must run the risk of Nagle - self.send(message_body) - - def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): - """Send a request to the server. - - `method' specifies an HTTP request method, e.g. 'GET'. - `url' specifies the object being requested, e.g. '/index.html'. - `skip_host' if True does not add automatically a 'Host:' header - `skip_accept_encoding' if True does not add automatically an - 'Accept-Encoding:' header - """ - - # if a prior response has been completed, then forget about it. - if self.__response and self.__response.isclosed(): - self.__response = None - - - # in certain cases, we cannot issue another request on this connection. - # this occurs when: - # 1) we are in the process of sending a request. (_CS_REQ_STARTED) - # 2) a response to a previous request has signalled that it is going - # to close the connection upon completion. - # 3) the headers for the previous response have not been read, thus - # we cannot determine whether point (2) is true. (_CS_REQ_SENT) - # - # if there is no prior response, then we can request at will. - # - # if point (2) is true, then we will have passed the socket to the - # response (effectively meaning, "there is no prior response"), and - # will open a new one when a new request is made. - # - # Note: if a prior response exists, then we *can* start a new request. - # We are not allowed to begin fetching the response to this new - # request, however, until that prior response is complete. - # - if self.__state == _CS_IDLE: - self.__state = _CS_REQ_STARTED - else: - raise CannotSendRequest() - - # Save the method we use, we need it later in the response phase - self._method = method - if not url: - url = '/' - hdr = '%s %s %s' % (method, url, self._http_vsn_str) - - self._output(hdr) - - if self._http_vsn == 11: - # Issue some standard headers for better HTTP/1.1 compliance - - if not skip_host: - # this header is issued *only* for HTTP/1.1 - # connections. more specifically, this means it is - # only issued when the client uses the new - # HTTPConnection() class. backwards-compat clients - # will be using HTTP/1.0 and those clients may be - # issuing this header themselves. we should NOT issue - # it twice; some web servers (such as Apache) barf - # when they see two Host: headers - - # If we need a non-standard port,include it in the - # header. If the request is going through a proxy, - # but the host of the actual URL, not the host of the - # proxy. - - netloc = '' - if url.startswith('http'): - nil, netloc, nil, nil, nil = urlsplit(url) - - if netloc: - try: - netloc_enc = netloc.encode("ascii") - except UnicodeEncodeError: - netloc_enc = netloc.encode("idna") - self.putheader('Host', netloc_enc) - else: - if self._tunnel_host: - host = self._tunnel_host - port = self._tunnel_port - else: - host = self.host - port = self.port - - try: - host_enc = host.encode("ascii") - except UnicodeEncodeError: - host_enc = host.encode("idna") - # Wrap the IPv6 Host Header with [] (RFC 2732) - if host_enc.find(':') >= 0: - host_enc = "[" + host_enc + "]" - if port == self.default_port: - self.putheader('Host', host_enc) - else: - self.putheader('Host', "%s:%s" % (host_enc, port)) - - # note: we are assuming that clients will not attempt to set these - # headers since *this* library must deal with the - # consequences. this also means that when the supporting - # libraries are updated to recognize other forms, then this - # code should be changed (removed or updated). - - # we only want a Content-Encoding of "identity" since we don't - # support encodings such as x-gzip or x-deflate. - if not skip_accept_encoding: - self.putheader('Accept-Encoding', 'identity') - - # we can accept "chunked" Transfer-Encodings, but no others - # NOTE: no TE header implies *only* "chunked" - #self.putheader('TE', 'chunked') - - # if TE is supplied in the header, then it must appear in a - # Connection header. - #self.putheader('Connection', 'TE') - - else: - # For HTTP/1.0, the server will assume "not chunked" - pass - - def putheader(self, header, *values): - """Send a request header line to the server. - - For example: h.putheader('Accept', 'text/html') - """ - if self.__state != _CS_REQ_STARTED: - raise CannotSendHeader() - - header = '%s' % header - if not _is_legal_header_name(header): - raise ValueError('Invalid header name %r' % (header,)) - - values = [str(v) for v in values] - for one_value in values: - if _is_illegal_header_value(one_value): - raise ValueError('Invalid header value %r' % (one_value,)) - - hdr = '%s: %s' % (header, '\r\n\t'.join(values)) - self._output(hdr) - - def endheaders(self, message_body=None): - """Indicate that the last header line has been sent to the server. - - This method sends the request to the server. The optional - message_body argument can be used to pass a message body - associated with the request. The message body will be sent in - the same packet as the message headers if it is string, otherwise it is - sent as a separate packet. - """ - if self.__state == _CS_REQ_STARTED: - self.__state = _CS_REQ_SENT - else: - raise CannotSendHeader() - self._send_output(message_body) - - def request(self, method, url, body=None, headers={}): - """Send a complete request to the server.""" - self._send_request(method, url, body, headers) - - def _set_content_length(self, body, method): - # Set the content-length based on the body. If the body is "empty", we - # set Content-Length: 0 for methods that expect a body (RFC 7230, - # Section 3.3.2). If the body is set for other methods, we set the - # header provided we can figure out what the length is. - thelen = None - if body is None and method.upper() in _METHODS_EXPECTING_BODY: - thelen = '0' - elif body is not None: - try: - thelen = str(len(body)) - except (TypeError, AttributeError): - # If this is a file-like object, try to - # fstat its file descriptor - try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print "Cannot stat!!" - - if thelen is not None: - self.putheader('Content-Length', thelen) - - def _send_request(self, method, url, body, headers): - # Honor explicitly requested Host: and Accept-Encoding: headers. - header_names = dict.fromkeys([k.lower() for k in headers]) - skips = {} - if 'host' in header_names: - skips['skip_host'] = 1 - if 'accept-encoding' in header_names: - skips['skip_accept_encoding'] = 1 - - self.putrequest(method, url, **skips) - - if 'content-length' not in header_names: - self._set_content_length(body, method) - for hdr, value in headers.iteritems(): - self.putheader(hdr, value) - self.endheaders(body) - - def getresponse(self, buffering=False): - "Get the response from the server." - - # if a prior response has been completed, then forget about it. - if self.__response and self.__response.isclosed(): - self.__response = None - - # - # if a prior response exists, then it must be completed (otherwise, we - # cannot read this response's header to determine the connection-close - # behavior) - # - # note: if a prior response existed, but was connection-close, then the - # socket and response were made independent of this HTTPConnection - # object since a new request requires that we open a whole new - # connection - # - # this means the prior response had one of two states: - # 1) will_close: this connection was reset and the prior socket and - # response operate independently - # 2) persistent: the response was retained and we await its - # isclosed() status to become true. - # - if self.__state != _CS_REQ_SENT or self.__response: - raise ResponseNotReady() - - args = (self.sock,) - kwds = {"strict":self.strict, "method":self._method} - if self.debuglevel > 0: - args += (self.debuglevel,) - if buffering: - #only add this keyword if non-default, for compatibility with - #other response_classes. - kwds["buffering"] = True; - response = self.response_class(*args, **kwds) - - try: - response.begin() - assert response.will_close != _UNKNOWN - self.__state = _CS_IDLE - - if response.will_close: - # this effectively passes the connection to the response - self.close() - else: - # remember this, so we can tell when it is complete - self.__response = response - - return response - except: - response.close() - raise - - -class HTTP: - "Compatibility class with httplib.py from 1.5." - - _http_vsn = 10 - _http_vsn_str = 'HTTP/1.0' - - debuglevel = 0 - - _connection_class = HTTPConnection - - def __init__(self, host='', port=None, strict=None): - "Provide a default host, since the superclass requires one." - - # some joker passed 0 explicitly, meaning default port - if port == 0: - port = None - - # Note that we may pass an empty string as the host; this will raise - # an error when we attempt to connect. Presumably, the client code - # will call connect before then, with a proper host. - self._setup(self._connection_class(host, port, strict)) - - def _setup(self, conn): - self._conn = conn - - # set up delegation to flesh out interface - self.send = conn.send - self.putrequest = conn.putrequest - self.putheader = conn.putheader - self.endheaders = conn.endheaders - self.set_debuglevel = conn.set_debuglevel - - conn._http_vsn = self._http_vsn - conn._http_vsn_str = self._http_vsn_str - - self.file = None - - def connect(self, host=None, port=None): - "Accept arguments to set the host/port, since the superclass doesn't." - - if host is not None: - (self._conn.host, self._conn.port) = self._conn._get_hostport(host, port) - self._conn.connect() - - def getfile(self): - "Provide a getfile, since the superclass' does not use this concept." - return self.file - - def getreply(self, buffering=False): - """Compat definition since superclass does not define it. - - Returns a tuple consisting of: - - server status code (e.g. '200' if all goes well) - - server "reason" corresponding to status code - - any RFC822 headers in the response from the server - """ - try: - if not buffering: - response = self._conn.getresponse() - else: - #only add this keyword if non-default for compatibility - #with other connection classes - response = self._conn.getresponse(buffering) - except BadStatusLine, e: - ### hmm. if getresponse() ever closes the socket on a bad request, - ### then we are going to have problems with self.sock - - ### should we keep this behavior? do people use it? - # keep the socket open (as a file), and return it - self.file = self._conn.sock.makefile('rb', 0) - - # close our socket -- we want to restart after any protocol error - self.close() - - self.headers = None - return -1, e.line, None - - self.headers = response.msg - self.file = response.fp - return response.status, response.reason, response.msg - - def close(self): - self._conn.close() - - # note that self.file == response.fp, which gets closed by the - # superclass. just clear the object ref here. - ### hmm. messy. if status==-1, then self.file is owned by us. - ### well... we aren't explicitly closing, but losing this ref will - ### do it - self.file = None - -try: - import ssl -except ImportError: - pass -else: - class HTTPSConnection(HTTPConnection): - "This class allows communication via SSL." - - default_port = HTTPS_PORT - - def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None, context=None): - HTTPConnection.__init__(self, host, port, strict, timeout, - source_address) - self.key_file = key_file - self.cert_file = cert_file - if context is None: - context = ssl._create_default_https_context() - if key_file or cert_file: - context.load_cert_chain(cert_file, key_file) - self._context = context - - def connect(self): - "Connect to a host on a given (SSL) port." - - HTTPConnection.connect(self) - - if self._tunnel_host: - server_hostname = self._tunnel_host - else: - server_hostname = self.host - - self.sock = self._context.wrap_socket(self.sock, - server_hostname=server_hostname) - - __all__.append("HTTPSConnection") - - class HTTPS(HTTP): - """Compatibility with 1.5 httplib interface - - Python 1.5.2 did not have an HTTPS class, but it defined an - interface for sending http requests that is also useful for - https. - """ - - _connection_class = HTTPSConnection - - def __init__(self, host='', port=None, key_file=None, cert_file=None, - strict=None, context=None): - # provide a default host, pass the X509 cert info - - # urf. compensate for bad input. - if port == 0: - port = None - self._setup(self._connection_class(host, port, key_file, - cert_file, strict, - context=context)) - - # we never actually use these for anything, but we keep them - # here for compatibility with post-1.5.2 CVS. - self.key_file = key_file - self.cert_file = cert_file - - - def FakeSocket (sock, sslobj): - warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " + - "Use the result of ssl.wrap_socket() directly instead.", - DeprecationWarning, stacklevel=2) - return sslobj - - -class HTTPException(Exception): - # Subclasses that define an __init__ must call Exception.__init__ - # or define self.args. Otherwise, str() will fail. - pass - -class NotConnected(HTTPException): - pass - -class InvalidURL(HTTPException): - pass - -class UnknownProtocol(HTTPException): - def __init__(self, version): - self.args = version, - self.version = version - -class UnknownTransferEncoding(HTTPException): - pass - -class UnimplementedFileMode(HTTPException): - pass - -class IncompleteRead(HTTPException): - def __init__(self, partial, expected=None): - self.args = partial, - self.partial = partial - self.expected = expected - def __repr__(self): - if self.expected is not None: - e = ', %i more expected' % self.expected - else: - e = '' - return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e) - def __str__(self): - return repr(self) - -class ImproperConnectionState(HTTPException): - pass - -class CannotSendRequest(ImproperConnectionState): - pass - -class CannotSendHeader(ImproperConnectionState): - pass - -class ResponseNotReady(ImproperConnectionState): - pass - -class BadStatusLine(HTTPException): - def __init__(self, line): - if not line: - line = repr(line) - self.args = line, - self.line = line - -class LineTooLong(HTTPException): - def __init__(self, line_type): - HTTPException.__init__(self, "got more than %d bytes when reading %s" - % (_MAXLINE, line_type)) - -# for backwards compatibility -error = HTTPException - -class LineAndFileWrapper: - """A limited file-like object for HTTP/0.9 responses.""" - - # The status-line parsing code calls readline(), which normally - # get the HTTP status line. For a 0.9 response, however, this is - # actually the first line of the body! Clients need to get a - # readable file object that contains that line. - - def __init__(self, line, file): - self._line = line - self._file = file - self._line_consumed = 0 - self._line_offset = 0 - self._line_left = len(line) - - def __getattr__(self, attr): - return getattr(self._file, attr) - - def _done(self): - # called when the last byte is read from the line. After the - # call, all read methods are delegated to the underlying file - # object. - self._line_consumed = 1 - self.read = self._file.read - self.readline = self._file.readline - self.readlines = self._file.readlines - - def read(self, amt=None): - if self._line_consumed: - return self._file.read(amt) - assert self._line_left - if amt is None or amt > self._line_left: - s = self._line[self._line_offset:] - self._done() - if amt is None: - return s + self._file.read() - else: - return s + self._file.read(amt - len(s)) - else: - assert amt <= self._line_left - i = self._line_offset - j = i + amt - s = self._line[i:j] - self._line_offset = j - self._line_left -= amt - if self._line_left == 0: - self._done() - return s - - def readline(self): - if self._line_consumed: - return self._file.readline() - assert self._line_left - s = self._line[self._line_offset:] - self._done() - return s - - def readlines(self, size=None): - if self._line_consumed: - return self._file.readlines(size) - assert self._line_left - L = [self._line[self._line_offset:]] - self._done() - if size is None: - return L + self._file.readlines() - else: - return L + self._file.readlines(size) diff --git a/python/Lib/idlelib/idle_test/README.txt b/python/Lib/idlelib/idle_test/README.txt deleted file mode 100644 index 6967d705d2..0000000000 --- a/python/Lib/idlelib/idle_test/README.txt +++ /dev/null @@ -1,150 +0,0 @@ -README FOR IDLE TESTS IN IDLELIB.IDLE_TEST - -0. Quick Start - -Automated unit tests were added in 2.7 for Python 2.x and 3.3 for Python 3.x. -To run the tests from a command line: - -python -m test.test_idle - -Human-mediated tests were added later in 2.7 and in 3.4. - -python -m idlelib.idle_test.htest - - -1. Test Files - -The idle directory, idlelib, has over 60 xyz.py files. The idle_test -subdirectory should contain a test_xyz.py for each, where 'xyz' is lowercased -even if xyz.py is not. Here is a possible template, with the blanks after -'.' and 'as', and before and after '_' to be filled in. - -import unittest -from test.support import requires -import idlelib. as - -class _Test(unittest.TestCase): - - def test_(self): - -if __name__ == '__main__': - unittest.main(verbosity=2) - -Add the following at the end of xyy.py, with the appropriate name added after -'test_'. Some files already have something like this for htest. If so, insert -the import and unittest.main lines before the htest lines. - -if __name__ == "__main__": - import unittest - unittest.main('idlelib.idle_test.test_', verbosity=2, exit=False) - - - -2. GUI Tests - -When run as part of the Python test suite, Idle GUI tests need to run -test.test_support.requires('gui') (test.support in 3.x). A test is a GUI test -if it creates a Tk root or master object either directly or indirectly by -instantiating a tkinter or idle class. For the benefit of test processes that -either have no graphical environment available or are not allowed to use it, GUI -tests must be 'guarded' by "requires('gui')" in a setUp function or method. -This will typically be setUpClass. - -To avoid interfering with other GUI tests, all GUI objects must be destroyed and -deleted by the end of the test. The Tk root created in a setUpX function should -be destroyed in the corresponding tearDownX and the module or class attribute -deleted. Others widgets should descend from the single root and the attributes -deleted BEFORE root is destroyed. See https://bugs.python.org/issue20567. - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = tk.Tk() - cls.text = tk.Text(root) - - @classmethod - def tearDownClass(cls): - del cls.text - cls.root.destroy() - del cls.root - -WARNING: In 2.7, "requires('gui') MUST NOT be called at module scope. -See https://bugs.python.org/issue18910 - -Requires('gui') causes the test(s) it guards to be skipped if any of -these conditions are met: - - - The tests are being run by regrtest.py, and it was started without enabling - the "gui" resource with the "-u" command line option. - - - The tests are being run on Windows by a service that is not allowed to - interact with the graphical environment. - - - The tests are being run on Linux and X Windows is not available. - - - The tests are being run on Mac OSX in a process that cannot make a window - manager connection. - - - tkinter.Tk cannot be successfully instantiated for some reason. - - - test.support.use_resources has been set by something other than - regrtest.py and does not contain "gui". - -Tests of non-GUI operations should avoid creating tk widgets. Incidental uses of -tk variables and messageboxes can be replaced by the mock classes in -idle_test/mock_tk.py. The mock text handles some uses of the tk Text widget. - - -3. Running Unit Tests - -Assume that xyz.py and test_xyz.py both end with a unittest.main() call. -Running either from an Idle editor runs all tests in the test_xyz file with the -version of Python running Idle. Test output appears in the Shell window. The -'verbosity=2' option lists all test methods in the file, which is appropriate -when developing tests. The 'exit=False' option is needed in xyx.py files when an -htest follows. - -The following command lines also run all test methods, including -GUI tests, in test_xyz.py. (Both '-m idlelib' and '-m idlelib.idle' start -Idle and so cannot run tests.) - -python -m idlelib.xyz -python -m idlelib.idle_test.test_xyz - -The following runs all idle_test/test_*.py tests interactively. - ->>> import unittest ->>> unittest.main('idlelib.idle_test', verbosity=2) - -The following run all Idle tests at a command line. Option '-v' is the same as -'verbosity=2'. (For 2.7, replace 'test' in the second line with -'test.regrtest'.) - -python -m unittest -v idlelib.idle_test -python -m test -v -ugui test_idle -python -m test.test_idle - -The idle tests are 'discovered' by idlelib.idle_test.__init__.load_tests, -which is also imported into test.test_idle. Normally, neither file should be -changed when working on individual test modules. The third command runs -unittest indirectly through regrtest. The same happens when the entire test -suite is run with 'python -m test'. So that command must work for buildbots -to stay green. Idle tests must not disturb the environment in a way that -makes other tests fail (issue 18081). - -To run an individual Testcase or test method, extend the dotted name given to -unittest on the command line. - -python -m unittest -v idlelib.idle_test.test_xyz.Test_case.test_meth - - -4. Human-mediated Tests - -Human-mediated tests are widget tests that cannot be automated but need human -verification. They are contained in idlelib/idle_test/htest.py, which has -instructions. (Some modules need an auxiliary function, identified with # htest -# on the header line.) The set is about complete, though some tests need -improvement. To run all htests, run the htest file from an editor or from the -command line with: - -python -m idlelib.idle_test.htest diff --git a/python/Lib/idlelib/idle_test/__init__.py b/python/Lib/idlelib/idle_test/__init__.py deleted file mode 100644 index 845c92d372..0000000000 --- a/python/Lib/idlelib/idle_test/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -'''idlelib.idle_test is a private implementation of test.test_idle, -which tests the IDLE application as part of the stdlib test suite. -Run IDLE tests alone with "python -m test.test_idle". -This package and its contained modules are subject to change and -any direct use is at your own risk. -''' -from os.path import dirname - -def load_tests(loader, standard_tests, pattern): - this_dir = dirname(__file__) - top_dir = dirname(dirname(this_dir)) - package_tests = loader.discover(start_dir=this_dir, pattern='test*.py', - top_level_dir=top_dir) - standard_tests.addTests(package_tests) - return standard_tests diff --git a/python/Lib/idlelib/idle_test/htest.py b/python/Lib/idlelib/idle_test/htest.py deleted file mode 100644 index f34140921c..0000000000 --- a/python/Lib/idlelib/idle_test/htest.py +++ /dev/null @@ -1,403 +0,0 @@ -'''Run human tests of Idle's window, dialog, and popup widgets. - -run(*tests) -Create a master Tk window. Within that, run each callable in tests -after finding the matching test spec in this file. If tests is empty, -run an htest for each spec dict in this file after finding the matching -callable in the module named in the spec. Close the window to skip or -end the test. - -In a tested module, let X be a global name bound to a callable (class -or function) whose .__name__ attrubute is also X (the usual situation). -The first parameter of X must be 'parent'. When called, the parent -argument will be the root window. X must create a child Toplevel -window (or subclass thereof). The Toplevel may be a test widget or -dialog, in which case the callable is the corresonding class. Or the -Toplevel may contain the widget to be tested or set up a context in -which a test widget is invoked. In this latter case, the callable is a -wrapper function that sets up the Toplevel and other objects. Wrapper -function names, such as _editor_window', should start with '_'. - - -End the module with - -if __name__ == '__main__': - - from idlelib.idle_test.htest import run - run(X) - -To have wrapper functions and test invocation code ignored by coveragepy -reports, put '# htest #' on the def statement header line. - -def _wrapper(parent): # htest # - -Also make sure that the 'if __name__' line matches the above. Then have -make sure that .coveragerc includes the following. - -[report] -exclude_lines = - .*# htest # - if __name__ == .__main__.: - -(The "." instead of "'" is intentional and necessary.) - - -To run any X, this file must contain a matching instance of the -following template, with X.__name__ prepended to '_spec'. -When all tests are run, the prefix is use to get X. - -_spec = { - 'file': '', - 'kwds': {'title': ''}, - 'msg': "" - } - -file (no .py): run() imports file.py. -kwds: augmented with {'parent':root} and passed to X as **kwds. -title: an example kwd; some widgets need this, delete if not. -msg: master window hints about testing the widget. - - -Modules and classes not being tested at the moment: -PyShell.PyShellEditorWindow -Debugger.Debugger -AutoCompleteWindow.AutoCompleteWindow -OutputWindow.OutputWindow (indirectly being tested with grep test) -''' - -from importlib import import_module -from idlelib.macosxSupport import _initializeTkVariantTests -import Tkinter as tk - -AboutDialog_spec = { - 'file': 'aboutDialog', - 'kwds': {'title': 'aboutDialog test', - '_htest': True, - }, - 'msg': "Test every button. Ensure Python, TK and IDLE versions " - "are correctly displayed.\n [Close] to exit.", - } - -_calltip_window_spec = { - 'file': 'CallTipWindow', - 'kwds': {}, - 'msg': "Typing '(' should display a calltip.\n" - "Typing ') should hide the calltip.\n" - } - -_class_browser_spec = { - 'file': 'ClassBrowser', - 'kwds': {}, - 'msg': "Inspect names of module, class(with superclass if " - "applicable), methods and functions.\nToggle nested items.\n" - "Double clicking on items prints a traceback for an exception " - "that is ignored." - } - -_color_delegator_spec = { - 'file': 'ColorDelegator', - 'kwds': {}, - 'msg': "The text is sample Python code.\n" - "Ensure components like comments, keywords, builtins,\n" - "string, definitions, and break are correctly colored.\n" - "The default color scheme is in idlelib/config-highlight.def" - } - -ConfigDialog_spec = { - 'file': 'configDialog', - 'kwds': {'title': 'ConfigDialogTest', - '_htest': True,}, - 'msg': "IDLE preferences dialog.\n" - "In the 'Fonts/Tabs' tab, changing font face, should update the " - "font face of the text in the area below it.\nIn the " - "'Highlighting' tab, try different color schemes. Clicking " - "items in the sample program should update the choices above it." - "\nIn the 'Keys', 'General' and 'Extensions' tabs, test settings" - "of interest." - "\n[Ok] to close the dialog.[Apply] to apply the settings and " - "and [Cancel] to revert all changes.\nRe-run the test to ensure " - "changes made have persisted." - } - -# TODO Improve message -_dyn_option_menu_spec = { - 'file': 'dynOptionMenuWidget', - 'kwds': {}, - 'msg': "Select one of the many options in the 'old option set'.\n" - "Click the button to change the option set.\n" - "Select one of the many options in the 'new option set'." - } - -# TODO edit wrapper -_editor_window_spec = { - 'file': 'EditorWindow', - 'kwds': {}, - 'msg': "Test editor functions of interest.\n" - "Best to close editor first." - } - -GetCfgSectionNameDialog_spec = { - 'file': 'configSectionNameDialog', - 'kwds': {'title':'Get Name', - 'message':'Enter something', - 'used_names': {'abc'}, - '_htest': True}, - 'msg': "After the text entered with [Ok] is stripped, , " - "'abc', or more that 30 chars are errors.\n" - "Close 'Get Name' with a valid entry (printed to Shell), " - "[Cancel], or [X]", - } - -GetHelpSourceDialog_spec = { - 'file': 'configHelpSourceEdit', - 'kwds': {'title': 'Get helpsource', - '_htest': True}, - 'msg': "Enter menu item name and help file path\n " - " and more than 30 chars are invalid menu item names.\n" - ", file does not exist are invalid path items.\n" - "Test for incomplete web address for help file path.\n" - "A valid entry will be printed to shell with [0k].\n" - "[Cancel] will print None to shell", - } - -# Update once issue21519 is resolved. -GetKeysDialog_spec = { - 'file': 'keybindingDialog', - 'kwds': {'title': 'Test keybindings', - 'action': 'find-again', - 'currentKeySequences': [''] , - '_htest': True, - }, - 'msg': "Test for different key modifier sequences.\n" - " is invalid.\n" - "No modifier key is invalid.\n" - "Shift key with [a-z],[0-9], function key, move key, tab, space" - "is invalid.\nNo validitity checking if advanced key binding " - "entry is used." - } - -_grep_dialog_spec = { - 'file': 'GrepDialog', - 'kwds': {}, - 'msg': "Click the 'Show GrepDialog' button.\n" - "Test the various 'Find-in-files' functions.\n" - "The results should be displayed in a new '*Output*' window.\n" - "'Right-click'->'Goto file/line' anywhere in the search results " - "should open that file \nin a new EditorWindow." - } - -_io_binding_spec = { - 'file': 'IOBinding', - 'kwds': {}, - 'msg': "Test the following bindings.\n" - " to open file from dialog.\n" - "Edit the file.\n" - " to print the file.\n" - " to save the file.\n" - " to save-as another file.\n" - " to save-copy-as another file.\n" - "Check that changes were saved by opening the file elsewhere." - } - -_multi_call_spec = { - 'file': 'MultiCall', - 'kwds': {}, - 'msg': "The following actions should trigger a print to console or IDLE" - " Shell.\nEntering and leaving the text area, key entry, " - ",\n, , " - ", \n, and " - "focusing out of the window\nare sequences to be tested." - } - -_multistatus_bar_spec = { - 'file': 'MultiStatusBar', - 'kwds': {}, - 'msg': "Ensure presence of multi-status bar below text area.\n" - "Click 'Update Status' to change the multi-status text" - } - -_object_browser_spec = { - 'file': 'ObjectBrowser', - 'kwds': {}, - 'msg': "Double click on items upto the lowest level.\n" - "Attributes of the objects and related information " - "will be displayed side-by-side at each level." - } - -_path_browser_spec = { - 'file': 'PathBrowser', - 'kwds': {}, - 'msg': "Test for correct display of all paths in sys.path.\n" - "Toggle nested items upto the lowest level.\n" - "Double clicking on an item prints a traceback\n" - "for an exception that is ignored." - } - -_percolator_spec = { - 'file': 'Percolator', - 'kwds': {}, - 'msg': "There are two tracers which can be toggled using a checkbox.\n" - "Toggling a tracer 'on' by checking it should print tracer" - "output to the console or to the IDLE shell.\n" - "If both the tracers are 'on', the output from the tracer which " - "was switched 'on' later, should be printed first\n" - "Test for actions like text entry, and removal." - } - -_replace_dialog_spec = { - 'file': 'ReplaceDialog', - 'kwds': {}, - 'msg': "Click the 'Replace' button.\n" - "Test various replace options in the 'Replace dialog'.\n" - "Click [Close] or [X] to close the 'Replace Dialog'." - } - -_search_dialog_spec = { - 'file': 'SearchDialog', - 'kwds': {}, - 'msg': "Click the 'Search' button.\n" - "Test various search options in the 'Search dialog'.\n" - "Click [Close] or [X] to close the 'Search Dialog'." - } - -_scrolled_list_spec = { - 'file': 'ScrolledList', - 'kwds': {}, - 'msg': "You should see a scrollable list of items\n" - "Selecting (clicking) or double clicking an item " - "prints the name to the console or Idle shell.\n" - "Right clicking an item will display a popup." - } - -show_idlehelp_spec = { - 'file': 'help', - 'kwds': {}, - 'msg': "If the help text displays, this works.\n" - "Text is selectable. Window is scrollable." - } - -_stack_viewer_spec = { - 'file': 'StackViewer', - 'kwds': {}, - 'msg': "A stacktrace for a NameError exception.\n" - "Expand 'idlelib ...' and ''.\n" - "Check that exc_value, exc_tb, and exc_type are correct.\n" - } - -_tabbed_pages_spec = { - 'file': 'tabbedpages', - 'kwds': {}, - 'msg': "Toggle between the two tabs 'foo' and 'bar'\n" - "Add a tab by entering a suitable name for it.\n" - "Remove an existing tab by entering its name.\n" - "Remove all existing tabs.\n" - " is an invalid add page and remove page name.\n" - } - -TextViewer_spec = { - 'file': 'textView', - 'kwds': {'title': 'Test textView', - 'text':'The quick brown fox jumps over the lazy dog.\n'*35, - '_htest': True}, - 'msg': "Test for read-only property of text.\n" - "Text is selectable. Window is scrollable.", - } - -_tooltip_spec = { - 'file': 'ToolTip', - 'kwds': {}, - 'msg': "Place mouse cursor over both the buttons\n" - "A tooltip should appear with some text." - } - -_tree_widget_spec = { - 'file': 'TreeWidget', - 'kwds': {}, - 'msg': "The canvas is scrollable.\n" - "Click on folders upto to the lowest level." - } - -_undo_delegator_spec = { - 'file': 'UndoDelegator', - 'kwds': {}, - 'msg': "Click [Undo] to undo any action.\n" - "Click [Redo] to redo any action.\n" - "Click [Dump] to dump the current state " - "by printing to the console or the IDLE shell.\n" - } - -_widget_redirector_spec = { - 'file': 'WidgetRedirector', - 'kwds': {}, - 'msg': "Every text insert should be printed to the console." - "or the IDLE shell." - } - -def run(*tests): - root = tk.Tk() - root.title('IDLE htest') - root.resizable(0, 0) - _initializeTkVariantTests(root) - - # a scrollable Label like constant width text widget. - frameLabel = tk.Frame(root, padx=10) - frameLabel.pack() - text = tk.Text(frameLabel, wrap='word') - text.configure(bg=root.cget('bg'), relief='flat', height=4, width=70) - scrollbar = tk.Scrollbar(frameLabel, command=text.yview) - text.config(yscrollcommand=scrollbar.set) - scrollbar.pack(side='right', fill='y', expand=False) - text.pack(side='left', fill='both', expand=True) - - test_list = [] # List of tuples of the form (spec, callable widget) - if tests: - for test in tests: - test_spec = globals()[test.__name__ + '_spec'] - test_spec['name'] = test.__name__ - test_list.append((test_spec, test)) - else: - for k, d in globals().items(): - if k.endswith('_spec'): - test_name = k[:-5] - test_spec = d - test_spec['name'] = test_name - mod = import_module('idlelib.' + test_spec['file']) - test = getattr(mod, test_name) - test_list.append((test_spec, test)) - - test_name = [tk.StringVar('')] - callable_object = [None] - test_kwds = [None] - - - def next(): - if len(test_list) == 1: - next_button.pack_forget() - test_spec, callable_object[0] = test_list.pop() - test_kwds[0] = test_spec['kwds'] - test_kwds[0]['parent'] = root - test_name[0].set('Test ' + test_spec['name']) - - text.configure(state='normal') # enable text editing - text.delete('1.0','end') - text.insert("1.0",test_spec['msg']) - text.configure(state='disabled') # preserve read-only property - - def run_test(): - widget = callable_object[0](**test_kwds[0]) - try: - print(widget.result) - except AttributeError: - pass - - button = tk.Button(root, textvariable=test_name[0], command=run_test) - button.pack() - next_button = tk.Button(root, text="Next", command=next) - next_button.pack() - - next() - - root.mainloop() - -if __name__ == '__main__': - run() diff --git a/python/Lib/idlelib/idle_test/mock_idle.py b/python/Lib/idlelib/idle_test/mock_idle.py deleted file mode 100644 index 7b09f836f0..0000000000 --- a/python/Lib/idlelib/idle_test/mock_idle.py +++ /dev/null @@ -1,55 +0,0 @@ -'''Mock classes that imitate idlelib modules or classes. - -Attributes and methods will be added as needed for tests. -''' - -from idlelib.idle_test.mock_tk import Text - -class Func(object): - '''Mock function captures args and returns result set by test. - - Attributes: - self.called - records call even if no args, kwds passed. - self.result - set by init, returned by call. - self.args - captures positional arguments. - self.kwds - captures keyword arguments. - - Most common use will probably be to mock methods. - Mock_tk.Var and Mbox_func are special variants of this. - ''' - def __init__(self, result=None): - self.called = False - self.result = result - self.args = None - self.kwds = None - def __call__(self, *args, **kwds): - self.called = True - self.args = args - self.kwds = kwds - if isinstance(self.result, BaseException): - raise self.result - else: - return self.result - - -class Editor(object): - '''Minimally imitate EditorWindow.EditorWindow class. - ''' - def __init__(self, flist=None, filename=None, key=None, root=None): - self.text = Text() - self.undo = UndoDelegator() - - def get_selection_indices(self): - first = self.text.index('1.0') - last = self.text.index('end') - return first, last - - -class UndoDelegator(object): - '''Minimally imitate UndoDelegator,UndoDelegator class. - ''' - # A real undo block is only needed for user interaction. - def undo_block_start(*args): - pass - def undo_block_stop(*args): - pass diff --git a/python/Lib/idlelib/idle_test/mock_tk.py b/python/Lib/idlelib/idle_test/mock_tk.py deleted file mode 100644 index f42a039711..0000000000 --- a/python/Lib/idlelib/idle_test/mock_tk.py +++ /dev/null @@ -1,298 +0,0 @@ -"""Classes that replace tkinter gui objects used by an object being tested. - -A gui object is anything with a master or parent parameter, which is -typically required in spite of what the doc strings say. -""" - -class Event(object): - '''Minimal mock with attributes for testing event handlers. - - This is not a gui object, but is used as an argument for callbacks - that access attributes of the event passed. If a callback ignores - the event, other than the fact that is happened, pass 'event'. - - Keyboard, mouse, window, and other sources generate Event instances. - Event instances have the following attributes: serial (number of - event), time (of event), type (of event as number), widget (in which - event occurred), and x,y (position of mouse). There are other - attributes for specific events, such as keycode for key events. - tkinter.Event.__doc__ has more but is still not complete. - ''' - def __init__(self, **kwds): - "Create event with attributes needed for test" - self.__dict__.update(kwds) - -class Var(object): - "Use for String/Int/BooleanVar: incomplete" - def __init__(self, master=None, value=None, name=None): - self.master = master - self.value = value - self.name = name - def set(self, value): - self.value = value - def get(self): - return self.value - -class Mbox_func(object): - """Generic mock for messagebox functions, which all have the same signature. - - Instead of displaying a message box, the mock's call method saves the - arguments as instance attributes, which test functions can then examime. - The test can set the result returned to ask function - """ - def __init__(self, result=None): - self.result = result # Return None for all show funcs - def __call__(self, title, message, *args, **kwds): - # Save all args for possible examination by tester - self.title = title - self.message = message - self.args = args - self.kwds = kwds - return self.result # Set by tester for ask functions - -class Mbox(object): - """Mock for tkinter.messagebox with an Mbox_func for each function. - - This module was 'tkMessageBox' in 2.x; hence the 'import as' in 3.x. - Example usage in test_module.py for testing functions in module.py: - --- -from idlelib.idle_test.mock_tk import Mbox -import module - -orig_mbox = module.tkMessageBox -showerror = Mbox.showerror # example, for attribute access in test methods - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - module.tkMessageBox = Mbox - - @classmethod - def tearDownClass(cls): - module.tkMessageBox = orig_mbox - --- - For 'ask' functions, set func.result return value before calling the method - that uses the message function. When tkMessageBox functions are the - only gui alls in a method, this replacement makes the method gui-free, - """ - askokcancel = Mbox_func() # True or False - askquestion = Mbox_func() # 'yes' or 'no' - askretrycancel = Mbox_func() # True or False - askyesno = Mbox_func() # True or False - askyesnocancel = Mbox_func() # True, False, or None - showerror = Mbox_func() # None - showinfo = Mbox_func() # None - showwarning = Mbox_func() # None - -from _tkinter import TclError - -class Text(object): - """A semi-functional non-gui replacement for tkinter.Text text editors. - - The mock's data model is that a text is a list of \n-terminated lines. - The mock adds an empty string at the beginning of the list so that the - index of actual lines start at 1, as with Tk. The methods never see this. - Tk initializes files with a terminal \n that cannot be deleted. It is - invisible in the sense that one cannot move the cursor beyond it. - - This class is only tested (and valid) with strings of ascii chars. - For testing, we are not concerned with Tk Text's treatment of, - for instance, 0-width characters or character + accent. - """ - def __init__(self, master=None, cnf={}, **kw): - '''Initialize mock, non-gui, text-only Text widget. - - At present, all args are ignored. Almost all affect visual behavior. - There are just a few Text-only options that affect text behavior. - ''' - self.data = ['', '\n'] - - def index(self, index): - "Return string version of index decoded according to current text." - return "%s.%s" % self._decode(index, endflag=1) - - def _decode(self, index, endflag=0): - """Return a (line, char) tuple of int indexes into self.data. - - This implements .index without converting the result back to a string. - The result is contrained by the number of lines and linelengths of - self.data. For many indexes, the result is initially (1, 0). - - The input index may have any of several possible forms: - * line.char float: converted to 'line.char' string; - * 'line.char' string, where line and char are decimal integers; - * 'line.char lineend', where lineend='lineend' (and char is ignored); - * 'line.end', where end='end' (same as above); - * 'insert', the positions before terminal \n; - * 'end', whose meaning depends on the endflag passed to ._endex. - * 'sel.first' or 'sel.last', where sel is a tag -- not implemented. - """ - if isinstance(index, (float, bytes)): - index = str(index) - try: - index=index.lower() - except AttributeError: - raise TclError('bad text index "%s"' % index) - - lastline = len(self.data) - 1 # same as number of text lines - if index == 'insert': - return lastline, len(self.data[lastline]) - 1 - elif index == 'end': - return self._endex(endflag) - - line, char = index.split('.') - line = int(line) - - # Out of bounds line becomes first or last ('end') index - if line < 1: - return 1, 0 - elif line > lastline: - return self._endex(endflag) - - linelength = len(self.data[line]) -1 # position before/at \n - if char.endswith(' lineend') or char == 'end': - return line, linelength - # Tk requires that ignored chars before ' lineend' be valid int - - # Out of bounds char becomes first or last index of line - char = int(char) - if char < 0: - char = 0 - elif char > linelength: - char = linelength - return line, char - - def _endex(self, endflag): - '''Return position for 'end' or line overflow corresponding to endflag. - - -1: position before terminal \n; for .insert(), .delete - 0: position after terminal \n; for .get, .delete index 1 - 1: same viewed as beginning of non-existent next line (for .index) - ''' - n = len(self.data) - if endflag == 1: - return n, 0 - else: - n -= 1 - return n, len(self.data[n]) + endflag - - - def insert(self, index, chars): - "Insert chars before the character at index." - - if not chars: # ''.splitlines() is [], not [''] - return - chars = chars.splitlines(True) - if chars[-1][-1] == '\n': - chars.append('') - line, char = self._decode(index, -1) - before = self.data[line][:char] - after = self.data[line][char:] - self.data[line] = before + chars[0] - self.data[line+1:line+1] = chars[1:] - self.data[line+len(chars)-1] += after - - - def get(self, index1, index2=None): - "Return slice from index1 to index2 (default is 'index1+1')." - - startline, startchar = self._decode(index1) - if index2 is None: - endline, endchar = startline, startchar+1 - else: - endline, endchar = self._decode(index2) - - if startline == endline: - return self.data[startline][startchar:endchar] - else: - lines = [self.data[startline][startchar:]] - for i in range(startline+1, endline): - lines.append(self.data[i]) - lines.append(self.data[endline][:endchar]) - return ''.join(lines) - - - def delete(self, index1, index2=None): - '''Delete slice from index1 to index2 (default is 'index1+1'). - - Adjust default index2 ('index+1) for line ends. - Do not delete the terminal \n at the very end of self.data ([-1][-1]). - ''' - startline, startchar = self._decode(index1, -1) - if index2 is None: - if startchar < len(self.data[startline])-1: - # not deleting \n - endline, endchar = startline, startchar+1 - elif startline < len(self.data) - 1: - # deleting non-terminal \n, convert 'index1+1 to start of next line - endline, endchar = startline+1, 0 - else: - # do not delete terminal \n if index1 == 'insert' - return - else: - endline, endchar = self._decode(index2, -1) - # restricting end position to insert position excludes terminal \n - - if startline == endline and startchar < endchar: - self.data[startline] = self.data[startline][:startchar] + \ - self.data[startline][endchar:] - elif startline < endline: - self.data[startline] = self.data[startline][:startchar] + \ - self.data[endline][endchar:] - startline += 1 - for i in range(startline, endline+1): - del self.data[startline] - - def compare(self, index1, op, index2): - line1, char1 = self._decode(index1) - line2, char2 = self._decode(index2) - if op == '<': - return line1 < line2 or line1 == line2 and char1 < char2 - elif op == '<=': - return line1 < line2 or line1 == line2 and char1 <= char2 - elif op == '>': - return line1 > line2 or line1 == line2 and char1 > char2 - elif op == '>=': - return line1 > line2 or line1 == line2 and char1 >= char2 - elif op == '==': - return line1 == line2 and char1 == char2 - elif op == '!=': - return line1 != line2 or char1 != char2 - else: - raise TclError('''bad comparison operator "%s":''' - '''must be <, <=, ==, >=, >, or !=''' % op) - - # The following Text methods normally do something and return None. - # Whether doing nothing is sufficient for a test will depend on the test. - - def mark_set(self, name, index): - "Set mark *name* before the character at index." - pass - - def mark_unset(self, *markNames): - "Delete all marks in markNames." - - def tag_remove(self, tagName, index1, index2=None): - "Remove tag tagName from all characters between index1 and index2." - pass - - # The following Text methods affect the graphics screen and return None. - # Doing nothing should always be sufficient for tests. - - def scan_dragto(self, x, y): - "Adjust the view of the text according to scan_mark" - - def scan_mark(self, x, y): - "Remember the current X, Y coordinates." - - def see(self, index): - "Scroll screen to make the character at INDEX is visible." - pass - - # The following is a Misc method inherited by Text. - # It should properly go in a Misc mock, but is included here for now. - - def bind(sequence=None, func=None, add=None): - "Bind to this widget at event sequence a call to function func." - pass diff --git a/python/Lib/idlelib/idle_test/test_autocomplete.py b/python/Lib/idlelib/idle_test/test_autocomplete.py deleted file mode 100644 index 002751efcc..0000000000 --- a/python/Lib/idlelib/idle_test/test_autocomplete.py +++ /dev/null @@ -1,140 +0,0 @@ -import unittest -from test.test_support import requires -from Tkinter import Tk, Text - -import idlelib.AutoComplete as ac -import idlelib.AutoCompleteWindow as acw -from idlelib.idle_test.mock_idle import Func -from idlelib.idle_test.mock_tk import Event - -class AutoCompleteWindow: - def complete(): - return - -class DummyEditwin: - def __init__(self, root, text): - self.root = root - self.text = text - self.indentwidth = 8 - self.tabwidth = 8 - self.context_use_ps1 = True - - -class AutoCompleteTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = Tk() - cls.text = Text(cls.root) - cls.editor = DummyEditwin(cls.root, cls.text) - - @classmethod - def tearDownClass(cls): - del cls.editor, cls.text - cls.root.destroy() - del cls.root - - def setUp(self): - self.editor.text.delete('1.0', 'end') - self.autocomplete = ac.AutoComplete(self.editor) - - def test_init(self): - self.assertEqual(self.autocomplete.editwin, self.editor) - - def test_make_autocomplete_window(self): - testwin = self.autocomplete._make_autocomplete_window() - self.assertIsInstance(testwin, acw.AutoCompleteWindow) - - def test_remove_autocomplete_window(self): - self.autocomplete.autocompletewindow = ( - self.autocomplete._make_autocomplete_window()) - self.autocomplete._remove_autocomplete_window() - self.assertIsNone(self.autocomplete.autocompletewindow) - - def test_force_open_completions_event(self): - # Test that force_open_completions_event calls _open_completions - o_cs = Func() - self.autocomplete.open_completions = o_cs - self.autocomplete.force_open_completions_event('event') - self.assertEqual(o_cs.args, (True, False, True)) - - def test_try_open_completions_event(self): - Equal = self.assertEqual - autocomplete = self.autocomplete - trycompletions = self.autocomplete.try_open_completions_event - o_c_l = Func() - autocomplete._open_completions_later = o_c_l - - # _open_completions_later should not be called with no text in editor - trycompletions('event') - Equal(o_c_l.args, None) - - # _open_completions_later should be called with COMPLETE_ATTRIBUTES (1) - self.text.insert('1.0', 're.') - trycompletions('event') - Equal(o_c_l.args, (False, False, False, 1)) - - # _open_completions_later should be called with COMPLETE_FILES (2) - self.text.delete('1.0', 'end') - self.text.insert('1.0', '"./Lib/') - trycompletions('event') - Equal(o_c_l.args, (False, False, False, 2)) - - def test_autocomplete_event(self): - Equal = self.assertEqual - autocomplete = self.autocomplete - - # Test that the autocomplete event is ignored if user is pressing a - # modifier key in addition to the tab key - ev = Event(mc_state=True) - self.assertIsNone(autocomplete.autocomplete_event(ev)) - del ev.mc_state - - # If autocomplete window is open, complete() method is called - self.text.insert('1.0', 're.') - # This must call autocomplete._make_autocomplete_window() - Equal(self.autocomplete.autocomplete_event(ev), 'break') - - # If autocomplete window is not active or does not exist, - # open_completions is called. Return depends on its return. - autocomplete._remove_autocomplete_window() - o_cs = Func() # .result = None - autocomplete.open_completions = o_cs - Equal(self.autocomplete.autocomplete_event(ev), None) - Equal(o_cs.args, (False, True, True)) - o_cs.result = True - Equal(self.autocomplete.autocomplete_event(ev), 'break') - Equal(o_cs.args, (False, True, True)) - - def test_open_completions_later(self): - # Test that autocomplete._delayed_completion_id is set - pass - - def test_delayed_open_completions(self): - # Test that autocomplete._delayed_completion_id set to None and that - # open_completions only called if insertion index is the same as - # _delayed_completion_index - pass - - def test_open_completions(self): - # Test completions of files and attributes as well as non-completion - # of errors - pass - - def test_fetch_completions(self): - # Test that fetch_completions returns 2 lists: - # For attribute completion, a large list containing all variables, and - # a small list containing non-private variables. - # For file completion, a large list containing all files in the path, - # and a small list containing files that do not start with '.' - pass - - def test_get_entity(self): - # Test that a name is in the namespace of sys.modules and - # __main__.__dict__ - pass - - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/python/Lib/idlelib/idle_test/test_autoexpand.py b/python/Lib/idlelib/idle_test/test_autoexpand.py deleted file mode 100644 index 6be4fbf861..0000000000 --- a/python/Lib/idlelib/idle_test/test_autoexpand.py +++ /dev/null @@ -1,141 +0,0 @@ -"""Unit tests for idlelib.AutoExpand""" -import unittest -from test.test_support import requires -from Tkinter import Text, Tk -#from idlelib.idle_test.mock_tk import Text -from idlelib.AutoExpand import AutoExpand - - -class Dummy_Editwin: - # AutoExpand.__init__ only needs .text - def __init__(self, text): - self.text = text - -class AutoExpandTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - if 'Tkinter' in str(Text): - requires('gui') - cls.tk = Tk() - cls.text = Text(cls.tk) - else: - cls.text = Text() - cls.auto_expand = AutoExpand(Dummy_Editwin(cls.text)) - - @classmethod - def tearDownClass(cls): - del cls.text, cls.auto_expand - if hasattr(cls, 'tk'): - cls.tk.destroy() - del cls.tk - - def tearDown(self): - self.text.delete('1.0', 'end') - - def test_get_prevword(self): - text = self.text - previous = self.auto_expand.getprevword - equal = self.assertEqual - - equal(previous(), '') - - text.insert('insert', 't') - equal(previous(), 't') - - text.insert('insert', 'his') - equal(previous(), 'this') - - text.insert('insert', ' ') - equal(previous(), '') - - text.insert('insert', 'is') - equal(previous(), 'is') - - text.insert('insert', '\nsample\nstring') - equal(previous(), 'string') - - text.delete('3.0', 'insert') - equal(previous(), '') - - text.delete('1.0', 'end') - equal(previous(), '') - - def test_before_only(self): - previous = self.auto_expand.getprevword - expand = self.auto_expand.expand_word_event - equal = self.assertEqual - - self.text.insert('insert', 'ab ac bx ad ab a') - equal(self.auto_expand.getwords(), ['ab', 'ad', 'ac', 'a']) - expand('event') - equal(previous(), 'ab') - expand('event') - equal(previous(), 'ad') - expand('event') - equal(previous(), 'ac') - expand('event') - equal(previous(), 'a') - - def test_after_only(self): - # Also add punctuation 'noise' that shoud be ignored. - text = self.text - previous = self.auto_expand.getprevword - expand = self.auto_expand.expand_word_event - equal = self.assertEqual - - text.insert('insert', 'a, [ab] ac: () bx"" cd ac= ad ya') - text.mark_set('insert', '1.1') - equal(self.auto_expand.getwords(), ['ab', 'ac', 'ad', 'a']) - expand('event') - equal(previous(), 'ab') - expand('event') - equal(previous(), 'ac') - expand('event') - equal(previous(), 'ad') - expand('event') - equal(previous(), 'a') - - def test_both_before_after(self): - text = self.text - previous = self.auto_expand.getprevword - expand = self.auto_expand.expand_word_event - equal = self.assertEqual - - text.insert('insert', 'ab xy yz\n') - text.insert('insert', 'a ac by ac') - - text.mark_set('insert', '2.1') - equal(self.auto_expand.getwords(), ['ab', 'ac', 'a']) - expand('event') - equal(previous(), 'ab') - expand('event') - equal(previous(), 'ac') - expand('event') - equal(previous(), 'a') - - def test_other_expand_cases(self): - text = self.text - expand = self.auto_expand.expand_word_event - equal = self.assertEqual - - # no expansion candidate found - equal(self.auto_expand.getwords(), []) - equal(expand('event'), 'break') - - text.insert('insert', 'bx cy dz a') - equal(self.auto_expand.getwords(), []) - - # reset state by successfully expanding once - # move cursor to another position and expand again - text.insert('insert', 'ac xy a ac ad a') - text.mark_set('insert', '1.7') - expand('event') - initial_state = self.auto_expand.state - text.mark_set('insert', '1.end') - expand('event') - new_state = self.auto_expand.state - self.assertNotEqual(initial_state, new_state) - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/python/Lib/idlelib/idle_test/test_calltips.py b/python/Lib/idlelib/idle_test/test_calltips.py deleted file mode 100644 index 147119ce37..0000000000 --- a/python/Lib/idlelib/idle_test/test_calltips.py +++ /dev/null @@ -1,185 +0,0 @@ -import unittest -import idlelib.CallTips as ct -CTi = ct.CallTips() # needed for get_entity test in 2.7 -import textwrap -import types -import warnings - -default_tip = '' - -# Test Class TC is used in multiple get_argspec test methods -class TC(object): - 'doc' - tip = "(ai=None, *args)" - def __init__(self, ai=None, *b): 'doc' - __init__.tip = "(self, ai=None, *args)" - def t1(self): 'doc' - t1.tip = "(self)" - def t2(self, ai, b=None): 'doc' - t2.tip = "(self, ai, b=None)" - def t3(self, ai, *args): 'doc' - t3.tip = "(self, ai, *args)" - def t4(self, *args): 'doc' - t4.tip = "(self, *args)" - def t5(self, ai, b=None, *args, **kw): 'doc' - t5.tip = "(self, ai, b=None, *args, **kwargs)" - def t6(no, self): 'doc' - t6.tip = "(no, self)" - def __call__(self, ci): 'doc' - __call__.tip = "(self, ci)" - # attaching .tip to wrapped methods does not work - @classmethod - def cm(cls, a): 'doc' - @staticmethod - def sm(b): 'doc' - -tc = TC() - -signature = ct.get_arg_text # 2.7 and 3.x use different functions -class Get_signatureTest(unittest.TestCase): - # The signature function must return a string, even if blank. - # Test a variety of objects to be sure that none cause it to raise - # (quite aside from getting as correct an answer as possible). - # The tests of builtins may break if the docstrings change, - # but a red buildbot is better than a user crash (as has happened). - # For a simple mismatch, change the expected output to the actual. - - def test_builtins(self): - # 2.7 puts '()\n' where 3.x does not, other minor differences - - # Python class that inherits builtin methods - class List(list): "List() doc" - # Simulate builtin with no docstring for default argspec test - class SB: __call__ = None - - def gtest(obj, out): - self.assertEqual(signature(obj), out) - - if List.__doc__ is not None: - gtest(List, '()\n' + List.__doc__) - gtest(list.__new__, - 'T.__new__(S, ...) -> a new object with type S, a subtype of T') - gtest(list.__init__, - 'x.__init__(...) initializes x; see help(type(x)) for signature') - append_doc = "L.append(object) -- append object to end" - gtest(list.append, append_doc) - gtest([].append, append_doc) - gtest(List.append, append_doc) - - gtest(types.MethodType, '()\ninstancemethod(function, instance, class)') - gtest(SB(), default_tip) - - def test_signature_wrap(self): - # This is also a test of an old-style class - if textwrap.TextWrapper.__doc__ is not None: - self.assertEqual(signature(textwrap.TextWrapper), '''\ -(width=70, initial_indent='', subsequent_indent='', expand_tabs=True, - replace_whitespace=True, fix_sentence_endings=False, break_long_words=True, - drop_whitespace=True, break_on_hyphens=True)''') - - def test_docline_truncation(self): - def f(): pass - f.__doc__ = 'a'*300 - self.assertEqual(signature(f), '()\n' + 'a' * (ct._MAX_COLS-3) + '...') - - def test_multiline_docstring(self): - # Test fewer lines than max. - self.assertEqual(signature(list), - "()\nlist() -> new empty list\n" - "list(iterable) -> new list initialized from iterable's items") - - # Test max lines and line (currently) too long. - def f(): - pass - s = 'a\nb\nc\nd\n' - f.__doc__ = s + 300 * 'e' + 'f' - self.assertEqual(signature(f), - '()\n' + s + (ct._MAX_COLS - 3) * 'e' + '...') - - def test_functions(self): - def t1(): 'doc' - t1.tip = "()" - def t2(a, b=None): 'doc' - t2.tip = "(a, b=None)" - def t3(a, *args): 'doc' - t3.tip = "(a, *args)" - def t4(*args): 'doc' - t4.tip = "(*args)" - def t5(a, b=None, *args, **kwds): 'doc' - t5.tip = "(a, b=None, *args, **kwargs)" - - doc = '\ndoc' if t1.__doc__ is not None else '' - for func in (t1, t2, t3, t4, t5, TC): - self.assertEqual(signature(func), func.tip + doc) - - def test_methods(self): - doc = '\ndoc' if TC.__doc__ is not None else '' - for meth in (TC.t1, TC.t2, TC.t3, TC.t4, TC.t5, TC.t6, TC.__call__): - self.assertEqual(signature(meth), meth.tip + doc) - self.assertEqual(signature(TC.cm), "(a)" + doc) - self.assertEqual(signature(TC.sm), "(b)" + doc) - - def test_bound_methods(self): - # test that first parameter is correctly removed from argspec - doc = '\ndoc' if TC.__doc__ is not None else '' - for meth, mtip in ((tc.t1, "()"), (tc.t4, "(*args)"), (tc.t6, "(self)"), - (tc.__call__, '(ci)'), (tc, '(ci)'), (TC.cm, "(a)"),): - self.assertEqual(signature(meth), mtip + doc) - - def test_starred_parameter(self): - # test that starred first parameter is *not* removed from argspec - class C: - def m1(*args): pass - def m2(**kwds): pass - def f1(args, kwargs, *a, **k): pass - def f2(args, kwargs, args1, kwargs1, *a, **k): pass - c = C() - self.assertEqual(signature(C.m1), '(*args)') - self.assertEqual(signature(c.m1), '(*args)') - self.assertEqual(signature(C.m2), '(**kwargs)') - self.assertEqual(signature(c.m2), '(**kwargs)') - self.assertEqual(signature(f1), '(args, kwargs, *args1, **kwargs1)') - self.assertEqual(signature(f2), - '(args, kwargs, args1, kwargs1, *args2, **kwargs2)') - - def test_no_docstring(self): - def nd(s): pass - TC.nd = nd - self.assertEqual(signature(nd), "(s)") - self.assertEqual(signature(TC.nd), "(s)") - self.assertEqual(signature(tc.nd), "()") - - def test_attribute_exception(self): - class NoCall(object): - def __getattr__(self, name): - raise BaseException - class Call(NoCall): - def __call__(self, ci): - pass - for meth, mtip in ((NoCall, '()'), (Call, '()'), - (NoCall(), ''), (Call(), '(ci)')): - self.assertEqual(signature(meth), mtip) - - def test_non_callables(self): - for obj in (0, 0.0, '0', b'0', [], {}): - self.assertEqual(signature(obj), '') - -class Get_entityTest(unittest.TestCase): - # In 3.x, get_entity changed from 'instance method' to module function - # since 'self' not used. Use dummy instance until change 2.7 also. - def test_bad_entity(self): - self.assertIsNone(CTi.get_entity('1//0')) - def test_good_entity(self): - self.assertIs(CTi.get_entity('int'), int) - -class Py2Test(unittest.TestCase): - def test_paramtuple_float(self): - # 18539: (a,b) becomes '.0' in code object; change that but not 0.0 - with warnings.catch_warnings(): - # Suppess message of py3 deprecation of parameter unpacking - warnings.simplefilter("ignore") - exec "def f((a,b), c=0.0): pass" - self.assertEqual(signature(f), '(, c=0.0)') - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=False) diff --git a/python/Lib/idlelib/idle_test/test_config_name.py b/python/Lib/idlelib/idle_test/test_config_name.py deleted file mode 100644 index 4403f87fd3..0000000000 --- a/python/Lib/idlelib/idle_test/test_config_name.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Unit tests for idlelib.configSectionNameDialog""" -import unittest -from idlelib.idle_test.mock_tk import Var, Mbox -from idlelib import configSectionNameDialog as name_dialog_module - -name_dialog = name_dialog_module.GetCfgSectionNameDialog - -class Dummy_name_dialog(object): - # Mock for testing the following methods of name_dialog - name_ok = name_dialog.name_ok.im_func - Ok = name_dialog.Ok.im_func - Cancel = name_dialog.Cancel.im_func - # Attributes, constant or variable, needed for tests - used_names = ['used'] - name = Var() - result = None - destroyed = False - def destroy(self): - self.destroyed = True - -# name_ok calls Mbox.showerror if name is not ok -orig_mbox = name_dialog_module.tkMessageBox -showerror = Mbox.showerror - -class ConfigNameTest(unittest.TestCase): - dialog = Dummy_name_dialog() - - @classmethod - def setUpClass(cls): - name_dialog_module.tkMessageBox = Mbox - - @classmethod - def tearDownClass(cls): - name_dialog_module.tkMessageBox = orig_mbox - - def test_blank_name(self): - self.dialog.name.set(' ') - self.assertEqual(self.dialog.name_ok(), '') - self.assertEqual(showerror.title, 'Name Error') - self.assertIn('No', showerror.message) - - def test_used_name(self): - self.dialog.name.set('used') - self.assertEqual(self.dialog.name_ok(), '') - self.assertEqual(showerror.title, 'Name Error') - self.assertIn('use', showerror.message) - - def test_long_name(self): - self.dialog.name.set('good'*8) - self.assertEqual(self.dialog.name_ok(), '') - self.assertEqual(showerror.title, 'Name Error') - self.assertIn('too long', showerror.message) - - def test_good_name(self): - self.dialog.name.set(' good ') - showerror.title = 'No Error' # should not be called - self.assertEqual(self.dialog.name_ok(), 'good') - self.assertEqual(showerror.title, 'No Error') - - def test_ok(self): - self.dialog.destroyed = False - self.dialog.name.set('good') - self.dialog.Ok() - self.assertEqual(self.dialog.result, 'good') - self.assertTrue(self.dialog.destroyed) - - def test_cancel(self): - self.dialog.destroyed = False - self.dialog.Cancel() - self.assertEqual(self.dialog.result, '') - self.assertTrue(self.dialog.destroyed) - - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=False) diff --git a/python/Lib/idlelib/idle_test/test_configdialog.py b/python/Lib/idlelib/idle_test/test_configdialog.py deleted file mode 100644 index ba651005f0..0000000000 --- a/python/Lib/idlelib/idle_test/test_configdialog.py +++ /dev/null @@ -1,33 +0,0 @@ -'''Unittests for idlelib/configHandler.py - -Coverage: 46% just by creating dialog. The other half is change code. - -''' -import unittest -from test.test_support import requires -from Tkinter import Tk -from idlelib.configDialog import ConfigDialog -from idlelib.macosxSupport import _initializeTkVariantTests - - -class ConfigDialogTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = Tk() - cls.root.withdraw() - _initializeTkVariantTests(cls.root) - - @classmethod - def tearDownClass(cls): - cls.root.destroy() - del cls.root - - def test_dialog(self): - d = ConfigDialog(self.root, 'Test', _utest=True) - d.remove_var_callbacks() - - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/python/Lib/idlelib/idle_test/test_delegator.py b/python/Lib/idlelib/idle_test/test_delegator.py deleted file mode 100644 index b8ae5eeefe..0000000000 --- a/python/Lib/idlelib/idle_test/test_delegator.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest -from idlelib.Delegator import Delegator - -class DelegatorTest(unittest.TestCase): - - def test_mydel(self): - # test a simple use scenario - - # initialize - mydel = Delegator(int) - self.assertIs(mydel.delegate, int) - self.assertEqual(mydel._Delegator__cache, set()) - - # add an attribute: - self.assertRaises(AttributeError, mydel.__getattr__, 'xyz') - bl = mydel.bit_length - self.assertIs(bl, int.bit_length) - self.assertIs(mydel.__dict__['bit_length'], int.bit_length) - self.assertEqual(mydel._Delegator__cache, {'bit_length'}) - - # add a second attribute - mydel.numerator - self.assertEqual(mydel._Delegator__cache, {'bit_length', 'numerator'}) - - # delete the second (which, however, leaves it in the name cache) - del mydel.numerator - self.assertNotIn('numerator', mydel.__dict__) - self.assertIn('numerator', mydel._Delegator__cache) - - # reset by calling .setdelegate, which calls .resetcache - mydel.setdelegate(float) - self.assertIs(mydel.delegate, float) - self.assertNotIn('bit_length', mydel.__dict__) - self.assertEqual(mydel._Delegator__cache, set()) - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=2) diff --git a/python/Lib/idlelib/idle_test/test_editmenu.py b/python/Lib/idlelib/idle_test/test_editmenu.py deleted file mode 100644 index 51d5c1638f..0000000000 --- a/python/Lib/idlelib/idle_test/test_editmenu.py +++ /dev/null @@ -1,101 +0,0 @@ -'''Test (selected) IDLE Edit menu items. - -Edit modules have their own test files files -''' -from test.test_support import requires -import Tkinter as tk -import unittest -from idlelib import PyShell - - -class PasteTest(unittest.TestCase): - '''Test pasting into widgets that allow pasting. - - On X11, replacing selections requires tk fix. - ''' - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = root = tk.Tk() - root.withdraw() - PyShell.fix_x11_paste(root) - cls.text = tk.Text(root) - cls.entry = tk.Entry(root) - cls.spin = tk.Spinbox(root) - root.clipboard_clear() - root.clipboard_append('two') - - @classmethod - def tearDownClass(cls): - del cls.text, cls.entry, cls.spin - cls.root.clipboard_clear() - cls.root.update_idletasks() - cls.root.update() - cls.root.destroy() - del cls.root - - def test_paste_text_no_selection(self): - "Test pasting into text without a selection." - text = self.text - tag, ans = '', 'onetwo\n' - text.delete('1.0', 'end') - text.insert('1.0', 'one', tag) - text.event_generate('<>') - self.assertEqual(text.get('1.0', 'end'), ans) - - def test_paste_text_selection(self): - "Test pasting into text with a selection." - text = self.text - tag, ans = 'sel', 'two\n' - text.delete('1.0', 'end') - text.insert('1.0', 'one', tag) - text.event_generate('<>') - self.assertEqual(text.get('1.0', 'end'), ans) - - def test_paste_entry_no_selection(self): - "Test pasting into an entry without a selection." - # On 3.6, generated <> fails without empty select range - # for 'no selection'. Live widget works fine. - entry = self.entry - end, ans = 0, 'onetwo' - entry.delete(0, 'end') - entry.insert(0, 'one') - entry.select_range(0, end) # see note - entry.event_generate('<>') - self.assertEqual(entry.get(), ans) - - def test_paste_entry_selection(self): - "Test pasting into an entry with a selection." - entry = self.entry - end, ans = 'end', 'two' - entry.delete(0, 'end') - entry.insert(0, 'one') - entry.select_range(0, end) - entry.event_generate('<>') - self.assertEqual(entry.get(), ans) - - def test_paste_spin_no_selection(self): - "Test pasting into a spinbox without a selection." - # See note above for entry. - spin = self.spin - end, ans = 0, 'onetwo' - spin.delete(0, 'end') - spin.insert(0, 'one') - spin.selection('range', 0, end) # see note - spin.event_generate('<>') - self.assertEqual(spin.get(), ans) - - def test_paste_spin_selection(self): - "Test pasting into a spinbox with a selection." - spin = self.spin - end, ans = 'end', 'two' - spin.delete(0, 'end') - spin.insert(0, 'one') - spin.selection('range', 0, end) - spin.event_generate('<>') - self.assertEqual(spin.get(), ans) - - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/python/Lib/idlelib/idle_test/test_formatparagraph.py b/python/Lib/idlelib/idle_test/test_formatparagraph.py deleted file mode 100644 index 068ae381c3..0000000000 --- a/python/Lib/idlelib/idle_test/test_formatparagraph.py +++ /dev/null @@ -1,376 +0,0 @@ -# Test the functions and main class method of FormatParagraph.py -import unittest -from idlelib import FormatParagraph as fp -from idlelib.EditorWindow import EditorWindow -from Tkinter import Tk, Text -from test.test_support import requires - - -class Is_Get_Test(unittest.TestCase): - """Test the is_ and get_ functions""" - test_comment = '# This is a comment' - test_nocomment = 'This is not a comment' - trailingws_comment = '# This is a comment ' - leadingws_comment = ' # This is a comment' - leadingws_nocomment = ' This is not a comment' - - def test_is_all_white(self): - self.assertTrue(fp.is_all_white('')) - self.assertTrue(fp.is_all_white('\t\n\r\f\v')) - self.assertFalse(fp.is_all_white(self.test_comment)) - - def test_get_indent(self): - Equal = self.assertEqual - Equal(fp.get_indent(self.test_comment), '') - Equal(fp.get_indent(self.trailingws_comment), '') - Equal(fp.get_indent(self.leadingws_comment), ' ') - Equal(fp.get_indent(self.leadingws_nocomment), ' ') - - def test_get_comment_header(self): - Equal = self.assertEqual - # Test comment strings - Equal(fp.get_comment_header(self.test_comment), '#') - Equal(fp.get_comment_header(self.trailingws_comment), '#') - Equal(fp.get_comment_header(self.leadingws_comment), ' #') - # Test non-comment strings - Equal(fp.get_comment_header(self.leadingws_nocomment), ' ') - Equal(fp.get_comment_header(self.test_nocomment), '') - - -class FindTest(unittest.TestCase): - """Test the find_paragraph function in FormatParagraph. - - Using the runcase() function, find_paragraph() is called with 'mark' set at - multiple indexes before and inside the test paragraph. - - It appears that code with the same indentation as a quoted string is grouped - as part of the same paragraph, which is probably incorrect behavior. - """ - - @classmethod - def setUpClass(cls): - from idlelib.idle_test.mock_tk import Text - cls.text = Text() - - def runcase(self, inserttext, stopline, expected): - # Check that find_paragraph returns the expected paragraph when - # the mark index is set to beginning, middle, end of each line - # up to but not including the stop line - text = self.text - text.insert('1.0', inserttext) - for line in range(1, stopline): - linelength = int(text.index("%d.end" % line).split('.')[1]) - for col in (0, linelength//2, linelength): - tempindex = "%d.%d" % (line, col) - self.assertEqual(fp.find_paragraph(text, tempindex), expected) - text.delete('1.0', 'end') - - def test_find_comment(self): - comment = ( - "# Comment block with no blank lines before\n" - "# Comment line\n" - "\n") - self.runcase(comment, 3, ('1.0', '3.0', '#', comment[0:58])) - - comment = ( - "\n" - "# Comment block with whitespace line before and after\n" - "# Comment line\n" - "\n") - self.runcase(comment, 4, ('2.0', '4.0', '#', comment[1:70])) - - comment = ( - "\n" - " # Indented comment block with whitespace before and after\n" - " # Comment line\n" - "\n") - self.runcase(comment, 4, ('2.0', '4.0', ' #', comment[1:82])) - - comment = ( - "\n" - "# Single line comment\n" - "\n") - self.runcase(comment, 3, ('2.0', '3.0', '#', comment[1:23])) - - comment = ( - "\n" - " # Single line comment with leading whitespace\n" - "\n") - self.runcase(comment, 3, ('2.0', '3.0', ' #', comment[1:51])) - - comment = ( - "\n" - "# Comment immediately followed by code\n" - "x = 42\n" - "\n") - self.runcase(comment, 3, ('2.0', '3.0', '#', comment[1:40])) - - comment = ( - "\n" - " # Indented comment immediately followed by code\n" - "x = 42\n" - "\n") - self.runcase(comment, 3, ('2.0', '3.0', ' #', comment[1:53])) - - comment = ( - "\n" - "# Comment immediately followed by indented code\n" - " x = 42\n" - "\n") - self.runcase(comment, 3, ('2.0', '3.0', '#', comment[1:49])) - - def test_find_paragraph(self): - teststring = ( - '"""String with no blank lines before\n' - 'String line\n' - '"""\n' - '\n') - self.runcase(teststring, 4, ('1.0', '4.0', '', teststring[0:53])) - - teststring = ( - "\n" - '"""String with whitespace line before and after\n' - 'String line.\n' - '"""\n' - '\n') - self.runcase(teststring, 5, ('2.0', '5.0', '', teststring[1:66])) - - teststring = ( - '\n' - ' """Indented string with whitespace before and after\n' - ' Comment string.\n' - ' """\n' - '\n') - self.runcase(teststring, 5, ('2.0', '5.0', ' ', teststring[1:85])) - - teststring = ( - '\n' - '"""Single line string."""\n' - '\n') - self.runcase(teststring, 3, ('2.0', '3.0', '', teststring[1:27])) - - teststring = ( - '\n' - ' """Single line string with leading whitespace."""\n' - '\n') - self.runcase(teststring, 3, ('2.0', '3.0', ' ', teststring[1:55])) - - -class ReformatFunctionTest(unittest.TestCase): - """Test the reformat_paragraph function without the editor window.""" - - def test_reformat_paragraph(self): - Equal = self.assertEqual - reform = fp.reformat_paragraph - hw = "O hello world" - Equal(reform(' ', 1), ' ') - Equal(reform("Hello world", 20), "Hello world") - - # Test without leading newline - Equal(reform(hw, 1), "O\nhello\nworld") - Equal(reform(hw, 6), "O\nhello\nworld") - Equal(reform(hw, 7), "O hello\nworld") - Equal(reform(hw, 12), "O hello\nworld") - Equal(reform(hw, 13), "O hello world") - - # Test with leading newline - hw = "\nO hello world" - Equal(reform(hw, 1), "\nO\nhello\nworld") - Equal(reform(hw, 6), "\nO\nhello\nworld") - Equal(reform(hw, 7), "\nO hello\nworld") - Equal(reform(hw, 12), "\nO hello\nworld") - Equal(reform(hw, 13), "\nO hello world") - - -class ReformatCommentTest(unittest.TestCase): - """Test the reformat_comment function without the editor window.""" - - def test_reformat_comment(self): - Equal = self.assertEqual - - # reformat_comment formats to a minimum of 20 characters - test_string = ( - " \"\"\"this is a test of a reformat for a triple quoted string" - " will it reformat to less than 70 characters for me?\"\"\"") - result = fp.reformat_comment(test_string, 70, " ") - expected = ( - " \"\"\"this is a test of a reformat for a triple quoted string will it\n" - " reformat to less than 70 characters for me?\"\"\"") - Equal(result, expected) - - test_comment = ( - "# this is a test of a reformat for a triple quoted string will " - "it reformat to less than 70 characters for me?") - result = fp.reformat_comment(test_comment, 70, "#") - expected = ( - "# this is a test of a reformat for a triple quoted string will it\n" - "# reformat to less than 70 characters for me?") - Equal(result, expected) - - -class FormatClassTest(unittest.TestCase): - def test_init_close(self): - instance = fp.FormatParagraph('editor') - self.assertEqual(instance.editwin, 'editor') - instance.close() - self.assertEqual(instance.editwin, None) - - -# For testing format_paragraph_event, Initialize FormatParagraph with -# a mock Editor with .text and .get_selection_indices. The text must -# be a Text wrapper that adds two methods - -# A real EditorWindow creates unneeded, time-consuming baggage and -# sometimes emits shutdown warnings like this: -# "warning: callback failed in WindowList -# : invalid command name ".55131368.windows". -# Calling EditorWindow._close in tearDownClass prevents this but causes -# other problems (windows left open). - -class TextWrapper: - def __init__(self, master): - self.text = Text(master=master) - def __getattr__(self, name): - return getattr(self.text, name) - def undo_block_start(self): pass - def undo_block_stop(self): pass - -class Editor: - def __init__(self, root): - self.text = TextWrapper(root) - get_selection_indices = EditorWindow. get_selection_indices.im_func - -class FormatEventTest(unittest.TestCase): - """Test the formatting of text inside a Text widget. - - This is done with FormatParagraph.format.paragraph_event, - which calls functions in the module as appropriate. - """ - test_string = ( - " '''this is a test of a reformat for a triple " - "quoted string will it reformat to less than 70 " - "characters for me?'''\n") - multiline_test_string = ( - " '''The first line is under the max width.\n" - " The second line's length is way over the max width. It goes " - "on and on until it is over 100 characters long.\n" - " Same thing with the third line. It is also way over the max " - "width, but FormatParagraph will fix it.\n" - " '''\n") - multiline_test_comment = ( - "# The first line is under the max width.\n" - "# The second line's length is way over the max width. It goes on " - "and on until it is over 100 characters long.\n" - "# Same thing with the third line. It is also way over the max " - "width, but FormatParagraph will fix it.\n" - "# The fourth line is short like the first line.") - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = Tk() - editor = Editor(root=cls.root) - cls.text = editor.text.text # Test code does not need the wrapper. - cls.formatter = fp.FormatParagraph(editor).format_paragraph_event - # Sets the insert mark just after the re-wrapped and inserted text. - - @classmethod - def tearDownClass(cls): - del cls.text, cls.formatter - cls.root.destroy() - del cls.root - - def test_short_line(self): - self.text.insert('1.0', "Short line\n") - self.formatter("Dummy") - self.assertEqual(self.text.get('1.0', 'insert'), "Short line\n" ) - self.text.delete('1.0', 'end') - - def test_long_line(self): - text = self.text - - # Set cursor ('insert' mark) to '1.0', within text. - text.insert('1.0', self.test_string) - text.mark_set('insert', '1.0') - self.formatter('ParameterDoesNothing', limit=70) - result = text.get('1.0', 'insert') - # find function includes \n - expected = ( -" '''this is a test of a reformat for a triple quoted string will it\n" -" reformat to less than 70 characters for me?'''\n") # yes - self.assertEqual(result, expected) - text.delete('1.0', 'end') - - # Select from 1.11 to line end. - text.insert('1.0', self.test_string) - text.tag_add('sel', '1.11', '1.end') - self.formatter('ParameterDoesNothing', limit=70) - result = text.get('1.0', 'insert') - # selection excludes \n - expected = ( -" '''this is a test of a reformat for a triple quoted string will it reformat\n" -" to less than 70 characters for me?'''") # no - self.assertEqual(result, expected) - text.delete('1.0', 'end') - - def test_multiple_lines(self): - text = self.text - # Select 2 long lines. - text.insert('1.0', self.multiline_test_string) - text.tag_add('sel', '2.0', '4.0') - self.formatter('ParameterDoesNothing', limit=70) - result = text.get('2.0', 'insert') - expected = ( -" The second line's length is way over the max width. It goes on and\n" -" on until it is over 100 characters long. Same thing with the third\n" -" line. It is also way over the max width, but FormatParagraph will\n" -" fix it.\n") - self.assertEqual(result, expected) - text.delete('1.0', 'end') - - def test_comment_block(self): - text = self.text - - # Set cursor ('insert') to '1.0', within block. - text.insert('1.0', self.multiline_test_comment) - self.formatter('ParameterDoesNothing', limit=70) - result = text.get('1.0', 'insert') - expected = ( -"# The first line is under the max width. The second line's length is\n" -"# way over the max width. It goes on and on until it is over 100\n" -"# characters long. Same thing with the third line. It is also way over\n" -"# the max width, but FormatParagraph will fix it. The fourth line is\n" -"# short like the first line.\n") - self.assertEqual(result, expected) - text.delete('1.0', 'end') - - # Select line 2, verify line 1 unaffected. - text.insert('1.0', self.multiline_test_comment) - text.tag_add('sel', '2.0', '3.0') - self.formatter('ParameterDoesNothing', limit=70) - result = text.get('1.0', 'insert') - expected = ( -"# The first line is under the max width.\n" -"# The second line's length is way over the max width. It goes on and\n" -"# on until it is over 100 characters long.\n") - self.assertEqual(result, expected) - text.delete('1.0', 'end') - -# The following block worked with EditorWindow but fails with the mock. -# Lines 2 and 3 get pasted together even though the previous block left -# the previous line alone. More investigation is needed. -## # Select lines 3 and 4 -## text.insert('1.0', self.multiline_test_comment) -## text.tag_add('sel', '3.0', '5.0') -## self.formatter('ParameterDoesNothing') -## result = text.get('3.0', 'insert') -## expected = ( -##"# Same thing with the third line. It is also way over the max width,\n" -##"# but FormatParagraph will fix it. The fourth line is short like the\n" -##"# first line.\n") -## self.assertEqual(result, expected) -## text.delete('1.0', 'end') - - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=2) diff --git a/python/Lib/idlelib/idle_test/test_grep.py b/python/Lib/idlelib/idle_test/test_grep.py deleted file mode 100644 index e9f4f22ae6..0000000000 --- a/python/Lib/idlelib/idle_test/test_grep.py +++ /dev/null @@ -1,82 +0,0 @@ -""" !Changing this line will break Test_findfile.test_found! -Non-gui unit tests for idlelib.GrepDialog methods. -dummy_command calls grep_it calls findfiles. -An exception raised in one method will fail callers. -Otherwise, tests are mostly independent. -*** Currently only test grep_it. -""" -import unittest -from test.test_support import captured_stdout, findfile -from idlelib.idle_test.mock_tk import Var -from idlelib.GrepDialog import GrepDialog -import re - -__file__ = findfile('idlelib/idle_test') + '/test_grep.py' - -class Dummy_searchengine: - '''GrepDialog.__init__ calls parent SearchDiabolBase which attaches the - passed in SearchEngine instance as attribute 'engine'. Only a few of the - many possible self.engine.x attributes are needed here. - ''' - def getpat(self): - return self._pat - -searchengine = Dummy_searchengine() - -class Dummy_grep: - # Methods tested - #default_command = GrepDialog.default_command - grep_it = GrepDialog.grep_it.im_func - findfiles = GrepDialog.findfiles.im_func - # Other stuff needed - recvar = Var(False) - engine = searchengine - def close(self): # gui method - pass - -grep = Dummy_grep() - -class FindfilesTest(unittest.TestCase): - # findfiles is really a function, not a method, could be iterator - # test that filename return filename - # test that idlelib has many .py files - # test that recursive flag adds idle_test .py files - pass - -class Grep_itTest(unittest.TestCase): - # Test captured reports with 0 and some hits. - # Should test file names, but Windows reports have mixed / and \ separators - # from incomplete replacement, so 'later'. - - def report(self, pat): - grep.engine._pat = pat - with captured_stdout() as s: - grep.grep_it(re.compile(pat), __file__) - lines = s.getvalue().split('\n') - lines.pop() # remove bogus '' after last \n - return lines - - def test_unfound(self): - pat = 'xyz*'*7 - lines = self.report(pat) - self.assertEqual(len(lines), 2) - self.assertIn(pat, lines[0]) - self.assertEqual(lines[1], 'No hits.') - - def test_found(self): - - pat = '""" !Changing this line will break Test_findfile.test_found!' - lines = self.report(pat) - self.assertEqual(len(lines), 5) - self.assertIn(pat, lines[0]) - self.assertIn('py: 1:', lines[1]) # line number 1 - self.assertIn('2', lines[3]) # hits found 2 - self.assertTrue(lines[4].startswith('(Hint:')) - -class Default_commandTest(unittest.TestCase): - # To write this, mode OutputWindow import to top of GrepDialog - # so it can be replaced by captured_stdout in class setup/teardown. - pass - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=False) diff --git a/python/Lib/idlelib/idle_test/test_helpabout.py b/python/Lib/idlelib/idle_test/test_helpabout.py deleted file mode 100644 index 0046f87762..0000000000 --- a/python/Lib/idlelib/idle_test/test_helpabout.py +++ /dev/null @@ -1,52 +0,0 @@ -'''Test idlelib.help_about. - -Coverage: -''' -from idlelib import aboutDialog as help_about -from idlelib import textView as textview -from idlelib.idle_test.mock_idle import Func -from idlelib.idle_test.mock_tk import Mbox -import unittest - -About = help_about.AboutDialog -class Dummy_about_dialog(): - # Dummy class for testing file display functions. - idle_credits = About.ShowIDLECredits.im_func - idle_readme = About.ShowIDLEAbout.im_func - idle_news = About.ShowIDLENEWS.im_func - # Called by the above - display_file_text = About.display_file_text.im_func - - -class DisplayFileTest(unittest.TestCase): - "Test that .txt files are found and properly decoded." - dialog = Dummy_about_dialog() - - @classmethod - def setUpClass(cls): - cls.orig_mbox = textview.tkMessageBox - cls.orig_view = textview.view_text - cls.mbox = Mbox() - cls.view = Func() - textview.tkMessageBox = cls.mbox - textview.view_text = cls.view - cls.About = Dummy_about_dialog() - - @classmethod - def tearDownClass(cls): - textview.tkMessageBox = cls.orig_mbox - textview.view_text = cls.orig_view.im_func - - def test_file_isplay(self): - for handler in (self.dialog.idle_credits, - self.dialog.idle_readme, - self.dialog.idle_news): - self.mbox.showerror.message = '' - self.view.called = False - handler() - self.assertEqual(self.mbox.showerror.message, '') - self.assertEqual(self.view.called, True) - - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/python/Lib/idlelib/idle_test/test_hyperparser.py b/python/Lib/idlelib/idle_test/test_hyperparser.py deleted file mode 100644 index 0a1809d2f7..0000000000 --- a/python/Lib/idlelib/idle_test/test_hyperparser.py +++ /dev/null @@ -1,192 +0,0 @@ -"""Unittest for idlelib.HyperParser""" -import unittest -from test.test_support import requires -from Tkinter import Tk, Text -from idlelib.EditorWindow import EditorWindow -from idlelib.HyperParser import HyperParser - -class DummyEditwin: - def __init__(self, text): - self.text = text - self.indentwidth = 8 - self.tabwidth = 8 - self.context_use_ps1 = True - self.num_context_lines = 50, 500, 1000 - - _build_char_in_string_func = EditorWindow._build_char_in_string_func.im_func - is_char_in_string = EditorWindow.is_char_in_string.im_func - - -class HyperParserTest(unittest.TestCase): - code = ( - '"""This is a module docstring"""\n' - '# this line is a comment\n' - 'x = "this is a string"\n' - "y = 'this is also a string'\n" - 'l = [i for i in range(10)]\n' - 'm = [py*py for # comment\n' - ' py in l]\n' - 'x.__len__\n' - "z = ((r'asdf')+('a')))\n" - '[x for x in\n' - 'for = False\n' - ) - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = Tk() - cls.root.withdraw() - cls.text = Text(cls.root) - cls.editwin = DummyEditwin(cls.text) - - @classmethod - def tearDownClass(cls): - del cls.text, cls.editwin - cls.root.destroy() - del cls.root - - def setUp(self): - self.text.insert('insert', self.code) - - def tearDown(self): - self.text.delete('1.0', 'end') - self.editwin.context_use_ps1 = True - - def get_parser(self, index): - """ - Return a parser object with index at 'index' - """ - return HyperParser(self.editwin, index) - - def test_init(self): - """ - test corner cases in the init method - """ - with self.assertRaises(ValueError) as ve: - self.text.tag_add('console', '1.0', '1.end') - p = self.get_parser('1.5') - self.assertIn('precedes', str(ve.exception)) - - # test without ps1 - self.editwin.context_use_ps1 = False - - # number of lines lesser than 50 - p = self.get_parser('end') - self.assertEqual(p.rawtext, self.text.get('1.0', 'end')) - - # number of lines greater than 50 - self.text.insert('end', self.text.get('1.0', 'end')*4) - p = self.get_parser('54.5') - - def test_is_in_string(self): - get = self.get_parser - - p = get('1.0') - self.assertFalse(p.is_in_string()) - p = get('1.4') - self.assertTrue(p.is_in_string()) - p = get('2.3') - self.assertFalse(p.is_in_string()) - p = get('3.3') - self.assertFalse(p.is_in_string()) - p = get('3.7') - self.assertTrue(p.is_in_string()) - p = get('4.6') - self.assertTrue(p.is_in_string()) - - def test_is_in_code(self): - get = self.get_parser - - p = get('1.0') - self.assertTrue(p.is_in_code()) - p = get('1.1') - self.assertFalse(p.is_in_code()) - p = get('2.5') - self.assertFalse(p.is_in_code()) - p = get('3.4') - self.assertTrue(p.is_in_code()) - p = get('3.6') - self.assertFalse(p.is_in_code()) - p = get('4.14') - self.assertFalse(p.is_in_code()) - - def test_get_surrounding_bracket(self): - get = self.get_parser - - def without_mustclose(parser): - # a utility function to get surrounding bracket - # with mustclose=False - return parser.get_surrounding_brackets(mustclose=False) - - def with_mustclose(parser): - # a utility function to get surrounding bracket - # with mustclose=True - return parser.get_surrounding_brackets(mustclose=True) - - p = get('3.2') - self.assertIsNone(with_mustclose(p)) - self.assertIsNone(without_mustclose(p)) - - p = get('5.6') - self.assertTupleEqual(without_mustclose(p), ('5.4', '5.25')) - self.assertTupleEqual(without_mustclose(p), with_mustclose(p)) - - p = get('5.23') - self.assertTupleEqual(without_mustclose(p), ('5.21', '5.24')) - self.assertTupleEqual(without_mustclose(p), with_mustclose(p)) - - p = get('6.15') - self.assertTupleEqual(without_mustclose(p), ('6.4', '6.end')) - self.assertIsNone(with_mustclose(p)) - - p = get('9.end') - self.assertIsNone(with_mustclose(p)) - self.assertIsNone(without_mustclose(p)) - - def test_get_expression(self): - get = self.get_parser - - p = get('4.2') - self.assertEqual(p.get_expression(), 'y ') - - p = get('4.7') - with self.assertRaises(ValueError) as ve: - p.get_expression() - self.assertIn('is inside a code', str(ve.exception)) - - p = get('5.25') - self.assertEqual(p.get_expression(), 'range(10)') - - p = get('6.7') - self.assertEqual(p.get_expression(), 'py') - - p = get('6.8') - self.assertEqual(p.get_expression(), '') - - p = get('7.9') - self.assertEqual(p.get_expression(), 'py') - - p = get('8.end') - self.assertEqual(p.get_expression(), 'x.__len__') - - p = get('9.13') - self.assertEqual(p.get_expression(), "r'asdf'") - - p = get('9.17') - with self.assertRaises(ValueError) as ve: - p.get_expression() - self.assertIn('is inside a code', str(ve.exception)) - - p = get('10.0') - self.assertEqual(p.get_expression(), '') - - p = get('11.3') - self.assertEqual(p.get_expression(), '') - - p = get('11.11') - self.assertEqual(p.get_expression(), 'False') - - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/python/Lib/idlelib/idle_test/test_idlehistory.py b/python/Lib/idlelib/idle_test/test_idlehistory.py deleted file mode 100644 index b0767570fc..0000000000 --- a/python/Lib/idlelib/idle_test/test_idlehistory.py +++ /dev/null @@ -1,168 +0,0 @@ -import unittest -from test.test_support import requires - -import Tkinter as tk -from Tkinter import Text as tkText -from idlelib.idle_test.mock_tk import Text as mkText -from idlelib.IdleHistory import History -from idlelib.configHandler import idleConf - -line1 = 'a = 7' -line2 = 'b = a' - -class StoreTest(unittest.TestCase): - '''Tests History.__init__ and History.store with mock Text''' - - @classmethod - def setUpClass(cls): - cls.text = mkText() - cls.history = History(cls.text) - - def tearDown(self): - self.text.delete('1.0', 'end') - self.history.history = [] - - def test_init(self): - self.assertIs(self.history.text, self.text) - self.assertEqual(self.history.history, []) - self.assertIsNone(self.history.prefix) - self.assertIsNone(self.history.pointer) - self.assertEqual(self.history.cyclic, - idleConf.GetOption("main", "History", "cyclic", 1, "bool")) - - def test_store_short(self): - self.history.store('a') - self.assertEqual(self.history.history, []) - self.history.store(' a ') - self.assertEqual(self.history.history, []) - - def test_store_dup(self): - self.history.store(line1) - self.assertEqual(self.history.history, [line1]) - self.history.store(line2) - self.assertEqual(self.history.history, [line1, line2]) - self.history.store(line1) - self.assertEqual(self.history.history, [line2, line1]) - - def test_store_reset(self): - self.history.prefix = line1 - self.history.pointer = 0 - self.history.store(line2) - self.assertIsNone(self.history.prefix) - self.assertIsNone(self.history.pointer) - - -class TextWrapper: - def __init__(self, master): - self.text = tkText(master=master) - self._bell = False - def __getattr__(self, name): - return getattr(self.text, name) - def bell(self): - self._bell = True - -class FetchTest(unittest.TestCase): - '''Test History.fetch with wrapped tk.Text. - ''' - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = tk.Tk() - cls.root.withdraw() - - def setUp(self): - self.text = text = TextWrapper(self.root) - text.insert('1.0', ">>> ") - text.mark_set('iomark', '1.4') - text.mark_gravity('iomark', 'left') - self.history = History(text) - self.history.history = [line1, line2] - - @classmethod - def tearDownClass(cls): - cls.root.destroy() - del cls.root - - def fetch_test(self, reverse, line, prefix, index, bell=False): - # Perform one fetch as invoked by Alt-N or Alt-P - # Test the result. The line test is the most important. - # The last two are diagnostic of fetch internals. - History = self.history - History.fetch(reverse) - - Equal = self.assertEqual - Equal(self.text.get('iomark', 'end-1c'), line) - Equal(self.text._bell, bell) - if bell: - self.text._bell = False - Equal(History.prefix, prefix) - Equal(History.pointer, index) - Equal(self.text.compare("insert", '==', "end-1c"), 1) - - def test_fetch_prev_cyclic(self): - prefix = '' - test = self.fetch_test - test(True, line2, prefix, 1) - test(True, line1, prefix, 0) - test(True, prefix, None, None, bell=True) - - def test_fetch_next_cyclic(self): - prefix = '' - test = self.fetch_test - test(False, line1, prefix, 0) - test(False, line2, prefix, 1) - test(False, prefix, None, None, bell=True) - - # Prefix 'a' tests skip line2, which starts with 'b' - def test_fetch_prev_prefix(self): - prefix = 'a' - self.text.insert('iomark', prefix) - self.fetch_test(True, line1, prefix, 0) - self.fetch_test(True, prefix, None, None, bell=True) - - def test_fetch_next_prefix(self): - prefix = 'a' - self.text.insert('iomark', prefix) - self.fetch_test(False, line1, prefix, 0) - self.fetch_test(False, prefix, None, None, bell=True) - - def test_fetch_prev_noncyclic(self): - prefix = '' - self.history.cyclic = False - test = self.fetch_test - test(True, line2, prefix, 1) - test(True, line1, prefix, 0) - test(True, line1, prefix, 0, bell=True) - - def test_fetch_next_noncyclic(self): - prefix = '' - self.history.cyclic = False - test = self.fetch_test - test(False, prefix, None, None, bell=True) - test(True, line2, prefix, 1) - test(False, prefix, None, None, bell=True) - test(False, prefix, None, None, bell=True) - - def test_fetch_cursor_move(self): - # Move cursor after fetch - self.history.fetch(reverse=True) # initialization - self.text.mark_set('insert', 'iomark') - self.fetch_test(True, line2, None, None, bell=True) - - def test_fetch_edit(self): - # Edit after fetch - self.history.fetch(reverse=True) # initialization - self.text.delete('iomark', 'insert', ) - self.text.insert('iomark', 'a =') - self.fetch_test(True, line1, 'a =', 0) # prefix is reset - - def test_history_prev_next(self): - # Minimally test functions bound to events - self.history.history_prev('dummy event') - self.assertEqual(self.history.pointer, 1) - self.history.history_next('dummy event') - self.assertEqual(self.history.pointer, None) - - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=2) diff --git a/python/Lib/idlelib/idle_test/test_io.py b/python/Lib/idlelib/idle_test/test_io.py deleted file mode 100644 index ee017bb8c6..0000000000 --- a/python/Lib/idlelib/idle_test/test_io.py +++ /dev/null @@ -1,267 +0,0 @@ -import unittest -import io -from idlelib.PyShell import PseudoInputFile, PseudoOutputFile -from test import test_support as support - - -class Base(object): - def __str__(self): - return '%s:str' % type(self).__name__ - def __unicode__(self): - return '%s:unicode' % type(self).__name__ - def __len__(self): - return 3 - def __iter__(self): - return iter('abc') - def __getitem__(self, *args): - return '%s:item' % type(self).__name__ - def __getslice__(self, *args): - return '%s:slice' % type(self).__name__ - -class S(Base, str): - pass - -class U(Base, unicode): - pass - -class BA(Base, bytearray): - pass - -class MockShell: - def __init__(self): - self.reset() - - def write(self, *args): - self.written.append(args) - - def readline(self): - return self.lines.pop() - - def close(self): - pass - - def reset(self): - self.written = [] - - def push(self, lines): - self.lines = list(lines)[::-1] - - -class PseudeOutputFilesTest(unittest.TestCase): - def test_misc(self): - shell = MockShell() - f = PseudoOutputFile(shell, 'stdout', 'utf-8') - self.assertIsInstance(f, io.TextIOBase) - self.assertEqual(f.encoding, 'utf-8') - self.assertIsNone(f.errors) - self.assertIsNone(f.newlines) - self.assertEqual(f.name, '') - self.assertFalse(f.closed) - self.assertTrue(f.isatty()) - self.assertFalse(f.readable()) - self.assertTrue(f.writable()) - self.assertFalse(f.seekable()) - - def test_unsupported(self): - shell = MockShell() - f = PseudoOutputFile(shell, 'stdout', 'utf-8') - self.assertRaises(IOError, f.fileno) - self.assertRaises(IOError, f.tell) - self.assertRaises(IOError, f.seek, 0) - self.assertRaises(IOError, f.read, 0) - self.assertRaises(IOError, f.readline, 0) - - def test_write(self): - shell = MockShell() - f = PseudoOutputFile(shell, 'stdout', 'utf-8') - f.write('test') - self.assertEqual(shell.written, [('test', 'stdout')]) - shell.reset() - f.write('t\xe8st') - self.assertEqual(shell.written, [('t\xe8st', 'stdout')]) - shell.reset() - f.write(u't\xe8st') - self.assertEqual(shell.written, [(u't\xe8st', 'stdout')]) - shell.reset() - - f.write(S('t\xe8st')) - self.assertEqual(shell.written, [('t\xe8st', 'stdout')]) - self.assertEqual(type(shell.written[0][0]), str) - shell.reset() - f.write(BA('t\xe8st')) - self.assertEqual(shell.written, [('t\xe8st', 'stdout')]) - self.assertEqual(type(shell.written[0][0]), str) - shell.reset() - f.write(U(u't\xe8st')) - self.assertEqual(shell.written, [(u't\xe8st', 'stdout')]) - self.assertEqual(type(shell.written[0][0]), unicode) - shell.reset() - - self.assertRaises(TypeError, f.write) - self.assertEqual(shell.written, []) - self.assertRaises(TypeError, f.write, 123) - self.assertEqual(shell.written, []) - self.assertRaises(TypeError, f.write, 'test', 'spam') - self.assertEqual(shell.written, []) - - def test_writelines(self): - shell = MockShell() - f = PseudoOutputFile(shell, 'stdout', 'utf-8') - f.writelines([]) - self.assertEqual(shell.written, []) - shell.reset() - f.writelines(['one\n', 'two']) - self.assertEqual(shell.written, - [('one\n', 'stdout'), ('two', 'stdout')]) - shell.reset() - f.writelines(['on\xe8\n', 'tw\xf2']) - self.assertEqual(shell.written, - [('on\xe8\n', 'stdout'), ('tw\xf2', 'stdout')]) - shell.reset() - f.writelines([u'on\xe8\n', u'tw\xf2']) - self.assertEqual(shell.written, - [(u'on\xe8\n', 'stdout'), (u'tw\xf2', 'stdout')]) - shell.reset() - - f.writelines([S('t\xe8st')]) - self.assertEqual(shell.written, [('t\xe8st', 'stdout')]) - self.assertEqual(type(shell.written[0][0]), str) - shell.reset() - f.writelines([BA('t\xe8st')]) - self.assertEqual(shell.written, [('t\xe8st', 'stdout')]) - self.assertEqual(type(shell.written[0][0]), str) - shell.reset() - f.writelines([U(u't\xe8st')]) - self.assertEqual(shell.written, [(u't\xe8st', 'stdout')]) - self.assertEqual(type(shell.written[0][0]), unicode) - shell.reset() - - self.assertRaises(TypeError, f.writelines) - self.assertEqual(shell.written, []) - self.assertRaises(TypeError, f.writelines, 123) - self.assertEqual(shell.written, []) - self.assertRaises(TypeError, f.writelines, [123]) - self.assertEqual(shell.written, []) - self.assertRaises(TypeError, f.writelines, [], []) - self.assertEqual(shell.written, []) - - def test_close(self): - shell = MockShell() - f = PseudoOutputFile(shell, 'stdout', 'utf-8') - self.assertFalse(f.closed) - f.write('test') - f.close() - self.assertTrue(f.closed) - self.assertRaises(ValueError, f.write, 'x') - self.assertEqual(shell.written, [('test', 'stdout')]) - f.close() - self.assertRaises(TypeError, f.close, 1) - - -class PseudeInputFilesTest(unittest.TestCase): - def test_misc(self): - shell = MockShell() - f = PseudoInputFile(shell, 'stdin', 'utf-8') - self.assertIsInstance(f, io.TextIOBase) - self.assertEqual(f.encoding, 'utf-8') - self.assertIsNone(f.errors) - self.assertIsNone(f.newlines) - self.assertEqual(f.name, '') - self.assertFalse(f.closed) - self.assertTrue(f.isatty()) - self.assertTrue(f.readable()) - self.assertFalse(f.writable()) - self.assertFalse(f.seekable()) - - def test_unsupported(self): - shell = MockShell() - f = PseudoInputFile(shell, 'stdin', 'utf-8') - self.assertRaises(IOError, f.fileno) - self.assertRaises(IOError, f.tell) - self.assertRaises(IOError, f.seek, 0) - self.assertRaises(IOError, f.write, 'x') - self.assertRaises(IOError, f.writelines, ['x']) - - def test_read(self): - shell = MockShell() - f = PseudoInputFile(shell, 'stdin', 'utf-8') - shell.push(['one\n', 'two\n', '']) - self.assertEqual(f.read(), 'one\ntwo\n') - shell.push(['one\n', 'two\n', '']) - self.assertEqual(f.read(-1), 'one\ntwo\n') - shell.push(['one\n', 'two\n', '']) - self.assertEqual(f.read(None), 'one\ntwo\n') - shell.push(['one\n', 'two\n', 'three\n', '']) - self.assertEqual(f.read(2), 'on') - self.assertEqual(f.read(3), 'e\nt') - self.assertEqual(f.read(10), 'wo\nthree\n') - - shell.push(['one\n', 'two\n']) - self.assertEqual(f.read(0), '') - self.assertRaises(TypeError, f.read, 1.5) - self.assertRaises(TypeError, f.read, '1') - self.assertRaises(TypeError, f.read, 1, 1) - - def test_readline(self): - shell = MockShell() - f = PseudoInputFile(shell, 'stdin', 'utf-8') - shell.push(['one\n', 'two\n', 'three\n', 'four\n']) - self.assertEqual(f.readline(), 'one\n') - self.assertEqual(f.readline(-1), 'two\n') - self.assertEqual(f.readline(None), 'three\n') - shell.push(['one\ntwo\n']) - self.assertEqual(f.readline(), 'one\n') - self.assertEqual(f.readline(), 'two\n') - shell.push(['one', 'two', 'three']) - self.assertEqual(f.readline(), 'one') - self.assertEqual(f.readline(), 'two') - shell.push(['one\n', 'two\n', 'three\n']) - self.assertEqual(f.readline(2), 'on') - self.assertEqual(f.readline(1), 'e') - self.assertEqual(f.readline(1), '\n') - self.assertEqual(f.readline(10), 'two\n') - - shell.push(['one\n', 'two\n']) - self.assertEqual(f.readline(0), '') - self.assertRaises(TypeError, f.readlines, 1.5) - self.assertRaises(TypeError, f.readlines, '1') - self.assertRaises(TypeError, f.readlines, 1, 1) - - def test_readlines(self): - shell = MockShell() - f = PseudoInputFile(shell, 'stdin', 'utf-8') - shell.push(['one\n', 'two\n', '']) - self.assertEqual(f.readlines(), ['one\n', 'two\n']) - shell.push(['one\n', 'two\n', '']) - self.assertEqual(f.readlines(-1), ['one\n', 'two\n']) - shell.push(['one\n', 'two\n', '']) - self.assertEqual(f.readlines(None), ['one\n', 'two\n']) - shell.push(['one\n', 'two\n', '']) - self.assertEqual(f.readlines(0), ['one\n', 'two\n']) - shell.push(['one\n', 'two\n', '']) - self.assertEqual(f.readlines(3), ['one\n']) - shell.push(['one\n', 'two\n', '']) - self.assertEqual(f.readlines(4), ['one\n', 'two\n']) - - shell.push(['one\n', 'two\n', '']) - self.assertRaises(TypeError, f.readlines, 1.5) - self.assertRaises(TypeError, f.readlines, '1') - self.assertRaises(TypeError, f.readlines, 1, 1) - - def test_close(self): - shell = MockShell() - f = PseudoInputFile(shell, 'stdin', 'utf-8') - shell.push(['one\n', 'two\n', '']) - self.assertFalse(f.closed) - self.assertEqual(f.readline(), 'one\n') - f.close() - self.assertFalse(f.closed) - self.assertEqual(f.readline(), 'two\n') - self.assertRaises(TypeError, f.close, 1) - - -def test_main(): - support.run_unittest(PseudeOutputFilesTest, PseudeInputFilesTest) - -if __name__ == '__main__': - test_main() diff --git a/python/Lib/idlelib/idle_test/test_parenmatch.py b/python/Lib/idlelib/idle_test/test_parenmatch.py deleted file mode 100644 index 1621981540..0000000000 --- a/python/Lib/idlelib/idle_test/test_parenmatch.py +++ /dev/null @@ -1,121 +0,0 @@ -"""Test idlelib.ParenMatch.""" -# This must currently be a gui test because ParenMatch methods use -# several text methods not defined on idlelib.idle_test.mock_tk.Text. - -import unittest -from test.test_support import requires -from Tkinter import Tk, Text -from idlelib.ParenMatch import ParenMatch - -class Mock: # 2.7 does not have unittest.mock - def __init__(self, *args, **kwargs): - self.called = False - - def __call__(self, *args, **kwargs): - self.called = True - - def reset_mock(self, *args, **kwargs): - self.called = False - - def after(self, *args, **kwargs): - pass - -class DummyEditwin: - def __init__(self, text): - self.text = text - self.indentwidth = 8 - self.tabwidth = 8 - self.context_use_ps1 = True - - -class ParenMatchTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = Tk() - cls.text = Text(cls.root) - cls.editwin = DummyEditwin(cls.text) - cls.editwin.text_frame = Mock() - - @classmethod - def tearDownClass(cls): - del cls.text, cls.editwin - cls.root.destroy() - del cls.root - - def tearDown(self): - self.text.delete('1.0', 'end') - - def test_paren_expression(self): - """ - Test ParenMatch with 'expression' style. - """ - text = self.text - pm = ParenMatch(self.editwin) - pm.set_style('expression') - - text.insert('insert', 'def foobar(a, b') - pm.flash_paren_event('event') - self.assertIn('<>', text.event_info()) - self.assertTupleEqual(text.tag_prevrange('paren', 'end'), - ('1.10', '1.15')) - text.insert('insert', ')') - pm.restore_event() - self.assertNotIn('<>', text.event_info()) - self.assertEqual(text.tag_prevrange('paren', 'end'), ()) - - # paren_closed_event can only be tested as below - pm.paren_closed_event('event') - self.assertTupleEqual(text.tag_prevrange('paren', 'end'), - ('1.10', '1.16')) - - def test_paren_default(self): - """ - Test ParenMatch with 'default' style. - """ - text = self.text - pm = ParenMatch(self.editwin) - pm.set_style('default') - - text.insert('insert', 'def foobar(a, b') - pm.flash_paren_event('event') - self.assertIn('<>', text.event_info()) - self.assertTupleEqual(text.tag_prevrange('paren', 'end'), - ('1.10', '1.11')) - text.insert('insert', ')') - pm.restore_event() - self.assertNotIn('<>', text.event_info()) - self.assertEqual(text.tag_prevrange('paren', 'end'), ()) - - def test_paren_corner(self): - """ - Test corner cases in flash_paren_event and paren_closed_event. - - These cases force conditional expression and alternate paths. - """ - text = self.text - pm = ParenMatch(self.editwin) - - text.insert('insert', '# this is a commen)') - self.assertIsNone(pm.paren_closed_event('event')) - - text.insert('insert', '\ndef') - self.assertIsNone(pm.flash_paren_event('event')) - self.assertIsNone(pm.paren_closed_event('event')) - - text.insert('insert', ' a, *arg)') - self.assertIsNone(pm.paren_closed_event('event')) - - def test_handle_restore_timer(self): - pm = ParenMatch(self.editwin) - pm.restore_event = Mock() - pm.handle_restore_timer(0) - self.assertTrue(pm.restore_event.called) - pm.restore_event.reset_mock() - pm.handle_restore_timer(1) - self.assertFalse(pm.restore_event.called) - - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/python/Lib/idlelib/idle_test/test_pathbrowser.py b/python/Lib/idlelib/idle_test/test_pathbrowser.py deleted file mode 100644 index f02841481b..0000000000 --- a/python/Lib/idlelib/idle_test/test_pathbrowser.py +++ /dev/null @@ -1,28 +0,0 @@ -import unittest -import os -import sys -import idlelib -from idlelib import PathBrowser - -class PathBrowserTest(unittest.TestCase): - - def test_DirBrowserTreeItem(self): - # Issue16226 - make sure that getting a sublist works - d = PathBrowser.DirBrowserTreeItem('') - d.GetSubList() - self.assertEqual('', d.GetText()) - - dir = os.path.split(os.path.abspath(idlelib.__file__))[0] - self.assertEqual(d.ispackagedir(dir), True) - self.assertEqual(d.ispackagedir(dir + '/Icons'), False) - - def test_PathBrowserTreeItem(self): - p = PathBrowser.PathBrowserTreeItem() - self.assertEqual(p.GetText(), 'sys.path') - sub = p.GetSubList() - self.assertEqual(len(sub), len(sys.path)) - # Following fails in 2.7 because old-style class - #self.assertEqual(type(sub[0]), PathBrowser.DirBrowserTreeItem) - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=False) diff --git a/python/Lib/idlelib/idle_test/test_rstrip.py b/python/Lib/idlelib/idle_test/test_rstrip.py deleted file mode 100644 index 1c90b93d21..0000000000 --- a/python/Lib/idlelib/idle_test/test_rstrip.py +++ /dev/null @@ -1,49 +0,0 @@ -import unittest -import idlelib.RstripExtension as rs -from idlelib.idle_test.mock_idle import Editor - -class rstripTest(unittest.TestCase): - - def test_rstrip_line(self): - editor = Editor() - text = editor.text - do_rstrip = rs.RstripExtension(editor).do_rstrip - - do_rstrip() - self.assertEqual(text.get('1.0', 'insert'), '') - text.insert('1.0', ' ') - do_rstrip() - self.assertEqual(text.get('1.0', 'insert'), '') - text.insert('1.0', ' \n') - do_rstrip() - self.assertEqual(text.get('1.0', 'insert'), '\n') - - def test_rstrip_multiple(self): - editor = Editor() - # Uncomment following to verify that test passes with real widgets. -## from idlelib.EditorWindow import EditorWindow as Editor -## from tkinter import Tk -## editor = Editor(root=Tk()) - text = editor.text - do_rstrip = rs.RstripExtension(editor).do_rstrip - - original = ( - "Line with an ending tab \n" - "Line ending in 5 spaces \n" - "Linewithnospaces\n" - " indented line\n" - " indented line with trailing space \n" - " ") - stripped = ( - "Line with an ending tab\n" - "Line ending in 5 spaces\n" - "Linewithnospaces\n" - " indented line\n" - " indented line with trailing space\n") - - text.insert('1.0', original) - do_rstrip() - self.assertEqual(text.get('1.0', 'insert'), stripped) - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=False) diff --git a/python/Lib/idlelib/idle_test/test_searchdialogbase.py b/python/Lib/idlelib/idle_test/test_searchdialogbase.py deleted file mode 100644 index 32abfe6f79..0000000000 --- a/python/Lib/idlelib/idle_test/test_searchdialogbase.py +++ /dev/null @@ -1,164 +0,0 @@ -'''Unittests for idlelib/SearchDialogBase.py - -Coverage: 99%. The only thing not covered is inconsequential -- -testing skipping of suite when self.needwrapbutton is false. - -''' -import unittest -from test.test_support import requires -from Tkinter import Tk, Toplevel, Frame ## BooleanVar, StringVar -from idlelib import SearchEngine as se -from idlelib import SearchDialogBase as sdb -from idlelib.idle_test.mock_idle import Func -##from idlelib.idle_test.mock_tk import Var - -# The ## imports above & following could help make some tests gui-free.# However, they currently make radiobutton tests fail. -##def setUpModule(): -## # Replace tk objects used to initialize se.SearchEngine. -## se.BooleanVar = Var -## se.StringVar = Var -## -##def tearDownModule(): -## se.BooleanVar = BooleanVar -## se.StringVar = StringVar - -class SearchDialogBaseTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = Tk() - - @classmethod - def tearDownClass(cls): - cls.root.destroy() - del cls.root - - def setUp(self): - self.engine = se.SearchEngine(self.root) # None also seems to work - self.dialog = sdb.SearchDialogBase(root=self.root, engine=self.engine) - - def tearDown(self): - self.dialog.close() - - def test_open_and_close(self): - # open calls create_widgets, which needs default_command - self.dialog.default_command = None - - # Since text parameter of .open is not used in base class, - # pass dummy 'text' instead of tk.Text(). - self.dialog.open('text') - self.assertEqual(self.dialog.top.state(), 'normal') - self.dialog.close() - self.assertEqual(self.dialog.top.state(), 'withdrawn') - - self.dialog.open('text', searchphrase="hello") - self.assertEqual(self.dialog.ent.get(), 'hello') - self.dialog.close() - - def test_create_widgets(self): - self.dialog.create_entries = Func() - self.dialog.create_option_buttons = Func() - self.dialog.create_other_buttons = Func() - self.dialog.create_command_buttons = Func() - - self.dialog.default_command = None - self.dialog.create_widgets() - - self.assertTrue(self.dialog.create_entries.called) - self.assertTrue(self.dialog.create_option_buttons.called) - self.assertTrue(self.dialog.create_other_buttons.called) - self.assertTrue(self.dialog.create_command_buttons.called) - - def test_make_entry(self): - equal = self.assertEqual - self.dialog.row = 0 - self.dialog.top = Toplevel(self.root) - entry, label = self.dialog.make_entry("Test:", 'hello') - equal(label['text'], 'Test:') - - self.assertIn(entry.get(), 'hello') - egi = entry.grid_info() - equal(int(egi['row']), 0) - equal(int(egi['column']), 1) - equal(int(egi['rowspan']), 1) - equal(int(egi['columnspan']), 1) - equal(self.dialog.row, 1) - - def test_create_entries(self): - self.dialog.row = 0 - self.engine.setpat('hello') - self.dialog.create_entries() - self.assertIn(self.dialog.ent.get(), 'hello') - - def test_make_frame(self): - self.dialog.row = 0 - self.dialog.top = Toplevel(self.root) - frame, label = self.dialog.make_frame() - self.assertEqual(label, '') - self.assertIsInstance(frame, Frame) - - frame, label = self.dialog.make_frame('testlabel') - self.assertEqual(label['text'], 'testlabel') - self.assertIsInstance(frame, Frame) - - def btn_test_setup(self, meth): - self.dialog.top = Toplevel(self.root) - self.dialog.row = 0 - return meth() - - def test_create_option_buttons(self): - e = self.engine - for state in (0, 1): - for var in (e.revar, e.casevar, e.wordvar, e.wrapvar): - var.set(state) - frame, options = self.btn_test_setup( - self.dialog.create_option_buttons) - for spec, button in zip (options, frame.pack_slaves()): - var, label = spec - self.assertEqual(button['text'], label) - self.assertEqual(var.get(), state) - if state == 1: - button.deselect() - else: - button.select() - self.assertEqual(var.get(), 1 - state) - - def test_create_other_buttons(self): - for state in (False, True): - var = self.engine.backvar - var.set(state) - frame, others = self.btn_test_setup( - self.dialog.create_other_buttons) - buttons = frame.pack_slaves() - for spec, button in zip(others, buttons): - val, label = spec - self.assertEqual(button['text'], label) - if val == state: - # hit other button, then this one - # indexes depend on button order - self.assertEqual(var.get(), state) - buttons[val].select() - self.assertEqual(var.get(), 1 - state) - buttons[1-val].select() - self.assertEqual(var.get(), state) - - def test_make_button(self): - self.dialog.top = Toplevel(self.root) - self.dialog.buttonframe = Frame(self.dialog.top) - btn = self.dialog.make_button('Test', self.dialog.close) - self.assertEqual(btn['text'], 'Test') - - def test_create_command_buttons(self): - self.dialog.create_command_buttons() - # Look for close button command in buttonframe - closebuttoncommand = '' - for child in self.dialog.buttonframe.winfo_children(): - if child['text'] == 'close': - closebuttoncommand = child['command'] - self.assertIn('close', closebuttoncommand) - - - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=2) diff --git a/python/Lib/idlelib/idle_test/test_searchengine.py b/python/Lib/idlelib/idle_test/test_searchengine.py deleted file mode 100644 index 8bf9d4728a..0000000000 --- a/python/Lib/idlelib/idle_test/test_searchengine.py +++ /dev/null @@ -1,329 +0,0 @@ -'''Test functions and SearchEngine class in SearchEngine.py.''' - -# With mock replacements, the module does not use any gui widgets. -# The use of tk.Text is avoided (for now, until mock Text is improved) -# by patching instances with an index function returning what is needed. -# This works because mock Text.get does not use .index. - -import re -import unittest -#from test.test_support import requires -from Tkinter import BooleanVar, StringVar, TclError # ,Tk, Text -import tkMessageBox -from idlelib import SearchEngine as se -from idlelib.idle_test.mock_tk import Var, Mbox -from idlelib.idle_test.mock_tk import Text as mockText - -def setUpModule(): - # Replace s-e module tkinter imports other than non-gui TclError. - se.BooleanVar = Var - se.StringVar = Var - se.tkMessageBox = Mbox - -def tearDownModule(): - # Restore 'just in case', though other tests should also replace. - se.BooleanVar = BooleanVar - se.StringVar = StringVar - se.tkMessageBox = tkMessageBox - - -class Mock: - def __init__(self, *args, **kwargs): pass - -class GetTest(unittest.TestCase): - # SearchEngine.get returns singleton created & saved on first call. - def test_get(self): - saved_Engine = se.SearchEngine - se.SearchEngine = Mock # monkey-patch class - try: - root = Mock() - engine = se.get(root) - self.assertIsInstance(engine, se.SearchEngine) - self.assertIs(root._searchengine, engine) - self.assertIs(se.get(root), engine) - finally: - se.SearchEngine = saved_Engine # restore class to module - -class GetLineColTest(unittest.TestCase): - # Test simple text-independent helper function - def test_get_line_col(self): - self.assertEqual(se.get_line_col('1.0'), (1, 0)) - self.assertEqual(se.get_line_col('1.11'), (1, 11)) - - self.assertRaises(ValueError, se.get_line_col, ('1.0 lineend')) - self.assertRaises(ValueError, se.get_line_col, ('end')) - -class GetSelectionTest(unittest.TestCase): - # Test text-dependent helper function. -## # Need gui for text.index('sel.first/sel.last/insert'). -## @classmethod -## def setUpClass(cls): -## requires('gui') -## cls.root = Tk() -## -## @classmethod -## def tearDownClass(cls): -## cls.root.destroy() -## del cls.root - - def test_get_selection(self): - # text = Text(master=self.root) - text = mockText() - text.insert('1.0', 'Hello World!') - - # fix text.index result when called in get_selection - def sel(s): - # select entire text, cursor irrelevant - if s == 'sel.first': return '1.0' - if s == 'sel.last': return '1.12' - raise TclError - text.index = sel # replaces .tag_add('sel', '1.0, '1.12') - self.assertEqual(se.get_selection(text), ('1.0', '1.12')) - - def mark(s): - # no selection, cursor after 'Hello' - if s == 'insert': return '1.5' - raise TclError - text.index = mark # replaces .mark_set('insert', '1.5') - self.assertEqual(se.get_selection(text), ('1.5', '1.5')) - - -class ReverseSearchTest(unittest.TestCase): - # Test helper function that searches backwards within a line. - def test_search_reverse(self): - Equal = self.assertEqual - line = "Here is an 'is' test text." - prog = re.compile('is') - Equal(se.search_reverse(prog, line, len(line)).span(), (12, 14)) - Equal(se.search_reverse(prog, line, 14).span(), (12, 14)) - Equal(se.search_reverse(prog, line, 13).span(), (5, 7)) - Equal(se.search_reverse(prog, line, 7).span(), (5, 7)) - Equal(se.search_reverse(prog, line, 6), None) - - -class SearchEngineTest(unittest.TestCase): - # Test class methods that do not use Text widget. - - def setUp(self): - self.engine = se.SearchEngine(root=None) - # Engine.root is only used to create error message boxes. - # The mock replacement ignores the root argument. - - def test_is_get(self): - engine = self.engine - Equal = self.assertEqual - - Equal(engine.getpat(), '') - engine.setpat('hello') - Equal(engine.getpat(), 'hello') - - Equal(engine.isre(), False) - engine.revar.set(1) - Equal(engine.isre(), True) - - Equal(engine.iscase(), False) - engine.casevar.set(1) - Equal(engine.iscase(), True) - - Equal(engine.isword(), False) - engine.wordvar.set(1) - Equal(engine.isword(), True) - - Equal(engine.iswrap(), True) - engine.wrapvar.set(0) - Equal(engine.iswrap(), False) - - Equal(engine.isback(), False) - engine.backvar.set(1) - Equal(engine.isback(), True) - - def test_setcookedpat(self): - engine = self.engine - engine.setcookedpat('\s') - self.assertEqual(engine.getpat(), '\s') - engine.revar.set(1) - engine.setcookedpat('\s') - self.assertEqual(engine.getpat(), r'\\s') - - def test_getcookedpat(self): - engine = self.engine - Equal = self.assertEqual - - Equal(engine.getcookedpat(), '') - engine.setpat('hello') - Equal(engine.getcookedpat(), 'hello') - engine.wordvar.set(True) - Equal(engine.getcookedpat(), r'\bhello\b') - engine.wordvar.set(False) - - engine.setpat('\s') - Equal(engine.getcookedpat(), r'\\s') - engine.revar.set(True) - Equal(engine.getcookedpat(), '\s') - - def test_getprog(self): - engine = self.engine - Equal = self.assertEqual - - engine.setpat('Hello') - temppat = engine.getprog() - Equal(temppat.pattern, re.compile('Hello', re.IGNORECASE).pattern) - engine.casevar.set(1) - temppat = engine.getprog() - Equal(temppat.pattern, re.compile('Hello').pattern, 0) - - engine.setpat('') - Equal(engine.getprog(), None) - engine.setpat('+') - engine.revar.set(1) - Equal(engine.getprog(), None) - self.assertEqual(Mbox.showerror.message, - 'Error: nothing to repeat\nPattern: +') - - def test_report_error(self): - showerror = Mbox.showerror - Equal = self.assertEqual - pat = '[a-z' - msg = 'unexpected end of regular expression' - - Equal(self.engine.report_error(pat, msg), None) - Equal(showerror.title, 'Regular expression error') - expected_message = ("Error: " + msg + "\nPattern: [a-z") - Equal(showerror.message, expected_message) - - Equal(self.engine.report_error(pat, msg, 5), None) - Equal(showerror.title, 'Regular expression error') - expected_message += "\nOffset: 5" - Equal(showerror.message, expected_message) - - -class SearchTest(unittest.TestCase): - # Test that search_text makes right call to right method. - - @classmethod - def setUpClass(cls): -## requires('gui') -## cls.root = Tk() -## cls.text = Text(master=cls.root) - cls.text = mockText() - test_text = ( - 'First line\n' - 'Line with target\n' - 'Last line\n') - cls.text.insert('1.0', test_text) - cls.pat = re.compile('target') - - cls.engine = se.SearchEngine(None) - cls.engine.search_forward = lambda *args: ('f', args) - cls.engine.search_backward = lambda *args: ('b', args) - -## @classmethod -## def tearDownClass(cls): -## cls.root.destroy() -## del cls.root - - def test_search(self): - Equal = self.assertEqual - engine = self.engine - search = engine.search_text - text = self.text - pat = self.pat - - engine.patvar.set(None) - #engine.revar.set(pat) - Equal(search(text), None) - - def mark(s): - # no selection, cursor after 'Hello' - if s == 'insert': return '1.5' - raise TclError - text.index = mark - Equal(search(text, pat), ('f', (text, pat, 1, 5, True, False))) - engine.wrapvar.set(False) - Equal(search(text, pat), ('f', (text, pat, 1, 5, False, False))) - engine.wrapvar.set(True) - engine.backvar.set(True) - Equal(search(text, pat), ('b', (text, pat, 1, 5, True, False))) - engine.backvar.set(False) - - def sel(s): - if s == 'sel.first': return '2.10' - if s == 'sel.last': return '2.16' - raise TclError - text.index = sel - Equal(search(text, pat), ('f', (text, pat, 2, 16, True, False))) - Equal(search(text, pat, True), ('f', (text, pat, 2, 10, True, True))) - engine.backvar.set(True) - Equal(search(text, pat), ('b', (text, pat, 2, 10, True, False))) - Equal(search(text, pat, True), ('b', (text, pat, 2, 16, True, True))) - - -class ForwardBackwardTest(unittest.TestCase): - # Test that search_forward method finds the target. -## @classmethod -## def tearDownClass(cls): -## cls.root.destroy() -## del cls.root - - @classmethod - def setUpClass(cls): - cls.engine = se.SearchEngine(None) -## requires('gui') -## cls.root = Tk() -## cls.text = Text(master=cls.root) - cls.text = mockText() - # search_backward calls index('end-1c') - cls.text.index = lambda index: '4.0' - test_text = ( - 'First line\n' - 'Line with target\n' - 'Last line\n') - cls.text.insert('1.0', test_text) - cls.pat = re.compile('target') - cls.res = (2, (10, 16)) # line, slice indexes of 'target' - cls.failpat = re.compile('xyz') # not in text - cls.emptypat = re.compile('\w*') # empty match possible - - def make_search(self, func): - def search(pat, line, col, wrap, ok=0): - res = func(self.text, pat, line, col, wrap, ok) - # res is (line, matchobject) or None - return (res[0], res[1].span()) if res else res - return search - - def test_search_forward(self): - # search for non-empty match - Equal = self.assertEqual - forward = self.make_search(self.engine.search_forward) - pat = self.pat - Equal(forward(pat, 1, 0, True), self.res) - Equal(forward(pat, 3, 0, True), self.res) # wrap - Equal(forward(pat, 3, 0, False), None) # no wrap - Equal(forward(pat, 2, 10, False), self.res) - - Equal(forward(self.failpat, 1, 0, True), None) - Equal(forward(self.emptypat, 2, 9, True, ok=True), (2, (9, 9))) - #Equal(forward(self.emptypat, 2, 9, True), self.res) - # While the initial empty match is correctly ignored, skipping - # the rest of the line and returning (3, (0,4)) seems buggy - tjr. - Equal(forward(self.emptypat, 2, 10, True), self.res) - - def test_search_backward(self): - # search for non-empty match - Equal = self.assertEqual - backward = self.make_search(self.engine.search_backward) - pat = self.pat - Equal(backward(pat, 3, 5, True), self.res) - Equal(backward(pat, 2, 0, True), self.res) # wrap - Equal(backward(pat, 2, 0, False), None) # no wrap - Equal(backward(pat, 2, 16, False), self.res) - - Equal(backward(self.failpat, 3, 9, True), None) - Equal(backward(self.emptypat, 2, 10, True, ok=True), (2, (9,9))) - # Accepted because 9 < 10, not because ok=True. - # It is not clear that ok=True is useful going back - tjr - Equal(backward(self.emptypat, 2, 9, True), (2, (5, 9))) - - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=2) diff --git a/python/Lib/idlelib/idle_test/test_text.py b/python/Lib/idlelib/idle_test/test_text.py deleted file mode 100644 index 50d3facec7..0000000000 --- a/python/Lib/idlelib/idle_test/test_text.py +++ /dev/null @@ -1,227 +0,0 @@ -# Test mock_tk.Text class against tkinter.Text class by running same tests with both. -import unittest -from test.test_support import requires - -from _tkinter import TclError - -class TextTest(object): - - hw = 'hello\nworld' # usual initial insert after initialization - hwn = hw+'\n' # \n present at initialization, before insert - - Text = None - def setUp(self): - self.text = self.Text() - - def test_init(self): - self.assertEqual(self.text.get('1.0'), '\n') - self.assertEqual(self.text.get('end'), '') - - def test_index_empty(self): - index = self.text.index - - for dex in (-1.0, 0.3, '1.-1', '1.0', '1.0 lineend', '1.end', '1.33', - 'insert'): - self.assertEqual(index(dex), '1.0') - - for dex in 'end', 2.0, '2.1', '33.44': - self.assertEqual(index(dex), '2.0') - - def test_index_data(self): - index = self.text.index - self.text.insert('1.0', self.hw) - - for dex in -1.0, 0.3, '1.-1', '1.0': - self.assertEqual(index(dex), '1.0') - - for dex in '1.0 lineend', '1.end', '1.33': - self.assertEqual(index(dex), '1.5') - - for dex in 'end', '33.44': - self.assertEqual(index(dex), '3.0') - - def test_get(self): - get = self.text.get - Equal = self.assertEqual - self.text.insert('1.0', self.hw) - - Equal(get('end'), '') - Equal(get('end', 'end'), '') - Equal(get('1.0'), 'h') - Equal(get('1.0', '1.1'), 'h') - Equal(get('1.0', '1.3'), 'hel') - Equal(get('1.1', '1.3'), 'el') - Equal(get('1.0', '1.0 lineend'), 'hello') - Equal(get('1.0', '1.10'), 'hello') - Equal(get('1.0 lineend'), '\n') - Equal(get('1.1', '2.3'), 'ello\nwor') - Equal(get('1.0', '2.5'), self.hw) - Equal(get('1.0', 'end'), self.hwn) - Equal(get('0.0', '5.0'), self.hwn) - - def test_insert(self): - insert = self.text.insert - get = self.text.get - Equal = self.assertEqual - - insert('1.0', self.hw) - Equal(get('1.0', 'end'), self.hwn) - - insert('1.0', '') # nothing - Equal(get('1.0', 'end'), self.hwn) - - insert('1.0', '*') - Equal(get('1.0', 'end'), '*hello\nworld\n') - - insert('1.0 lineend', '*') - Equal(get('1.0', 'end'), '*hello*\nworld\n') - - insert('2.3', '*') - Equal(get('1.0', 'end'), '*hello*\nwor*ld\n') - - insert('end', 'x') - Equal(get('1.0', 'end'), '*hello*\nwor*ldx\n') - - insert('1.4', 'x\n') - Equal(get('1.0', 'end'), '*helx\nlo*\nwor*ldx\n') - - def test_no_delete(self): - # if index1 == 'insert' or 'end' or >= end, there is no deletion - delete = self.text.delete - get = self.text.get - Equal = self.assertEqual - self.text.insert('1.0', self.hw) - - delete('insert') - Equal(get('1.0', 'end'), self.hwn) - - delete('end') - Equal(get('1.0', 'end'), self.hwn) - - delete('insert', 'end') - Equal(get('1.0', 'end'), self.hwn) - - delete('insert', '5.5') - Equal(get('1.0', 'end'), self.hwn) - - delete('1.4', '1.0') - Equal(get('1.0', 'end'), self.hwn) - - delete('1.4', '1.4') - Equal(get('1.0', 'end'), self.hwn) - - def test_delete_char(self): - delete = self.text.delete - get = self.text.get - Equal = self.assertEqual - self.text.insert('1.0', self.hw) - - delete('1.0') - Equal(get('1.0', '1.end'), 'ello') - - delete('1.0', '1.1') - Equal(get('1.0', '1.end'), 'llo') - - # delete \n and combine 2 lines into 1 - delete('1.end') - Equal(get('1.0', '1.end'), 'lloworld') - - self.text.insert('1.3', '\n') - delete('1.10') - Equal(get('1.0', '1.end'), 'lloworld') - - self.text.insert('1.3', '\n') - delete('1.3', '2.0') - Equal(get('1.0', '1.end'), 'lloworld') - - def test_delete_slice(self): - delete = self.text.delete - get = self.text.get - Equal = self.assertEqual - self.text.insert('1.0', self.hw) - - delete('1.0', '1.0 lineend') - Equal(get('1.0', 'end'), '\nworld\n') - - delete('1.0', 'end') - Equal(get('1.0', 'end'), '\n') - - self.text.insert('1.0', self.hw) - delete('1.0', '2.0') - Equal(get('1.0', 'end'), 'world\n') - - delete('1.0', 'end') - Equal(get('1.0', 'end'), '\n') - - self.text.insert('1.0', self.hw) - delete('1.2', '2.3') - Equal(get('1.0', 'end'), 'held\n') - - def test_multiple_lines(self): # insert and delete - self.text.insert('1.0', 'hello') - - self.text.insert('1.3', '1\n2\n3\n4\n5') - self.assertEqual(self.text.get('1.0', 'end'), 'hel1\n2\n3\n4\n5lo\n') - - self.text.delete('1.3', '5.1') - self.assertEqual(self.text.get('1.0', 'end'), 'hello\n') - - def test_compare(self): - compare = self.text.compare - Equal = self.assertEqual - # need data so indexes not squished to 1,0 - self.text.insert('1.0', 'First\nSecond\nThird\n') - - self.assertRaises(TclError, compare, '2.2', 'op', '2.2') - - for op, less1, less0, equal, greater0, greater1 in ( - ('<', True, True, False, False, False), - ('<=', True, True, True, False, False), - ('>', False, False, False, True, True), - ('>=', False, False, True, True, True), - ('==', False, False, True, False, False), - ('!=', True, True, False, True, True), - ): - Equal(compare('1.1', op, '2.2'), less1, op) - Equal(compare('2.1', op, '2.2'), less0, op) - Equal(compare('2.2', op, '2.2'), equal, op) - Equal(compare('2.3', op, '2.2'), greater0, op) - Equal(compare('3.3', op, '2.2'), greater1, op) - - -class MockTextTest(TextTest, unittest.TestCase): - - @classmethod - def setUpClass(cls): - from idlelib.idle_test.mock_tk import Text - cls.Text = Text - - def test_decode(self): - # test endflags (-1, 0) not tested by test_index (which uses +1) - decode = self.text._decode - Equal = self.assertEqual - self.text.insert('1.0', self.hw) - - Equal(decode('end', -1), (2, 5)) - Equal(decode('3.1', -1), (2, 5)) - Equal(decode('end', 0), (2, 6)) - Equal(decode('3.1', 0), (2, 6)) - - -class TkTextTest(TextTest, unittest.TestCase): - - @classmethod - def setUpClass(cls): - requires('gui') - from Tkinter import Tk, Text - cls.Text = Text - cls.root = Tk() - - @classmethod - def tearDownClass(cls): - cls.root.destroy() - del cls.root - - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=False) diff --git a/python/Lib/idlelib/idle_test/test_textview.py b/python/Lib/idlelib/idle_test/test_textview.py deleted file mode 100644 index fa437fcb6e..0000000000 --- a/python/Lib/idlelib/idle_test/test_textview.py +++ /dev/null @@ -1,96 +0,0 @@ -'''Test the functions and main class method of textView.py.''' - -import unittest -import os -from test.test_support import requires -from Tkinter import Tk -from idlelib import textView as tv -from idlelib.idle_test.mock_idle import Func -from idlelib.idle_test.mock_tk import Mbox - - -class TV(tv.TextViewer): # Use in TextViewTest - transient = Func() - grab_set = Func() - wait_window = Func() - -class textviewClassTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = Tk() - cls.root.withdraw() - - @classmethod - def tearDownClass(cls): - cls.root.destroy() - del cls.root - - def setUp(self): - TV.transient.__init__() - TV.grab_set.__init__() - TV.wait_window.__init__() - - def test_init_modal(self): - view = TV(self.root, 'Title', 'test text') - self.assertTrue(TV.transient.called) - self.assertTrue(TV.grab_set.called) - self.assertTrue(TV.wait_window.called) - view.Ok() - - def test_init_nonmodal(self): - view = TV(self.root, 'Title', 'test text', modal=False) - self.assertFalse(TV.transient.called) - self.assertFalse(TV.grab_set.called) - self.assertFalse(TV.wait_window.called) - view.Ok() - - def test_ok(self): - view = TV(self.root, 'Title', 'test text', modal=False) - view.destroy = Func() - view.Ok() - self.assertTrue(view.destroy.called) - del view.destroy # Unmask the real function. - view.destroy() - - -class ViewFunctionTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = Tk() - cls.root.withdraw() - cls.orig_mbox = tv.tkMessageBox - tv.tkMessageBox = Mbox - - @classmethod - def tearDownClass(cls): - cls.root.destroy() - del cls.root - tv.tkMessageBox = cls.orig_mbox - del cls.orig_mbox - - def test_view_text(self): - # If modal True, get tkinter error 'can't invoke "event" command'. - view = tv.view_text(self.root, 'Title', 'test text', modal=False) - self.assertIsInstance(view, tv.TextViewer) - view.Ok() - - def test_view_file(self): - test_dir = os.path.dirname(__file__) - testfile = os.path.join(test_dir, 'test_textview.py') - view = tv.view_file(self.root, 'Title', testfile, modal=False) - self.assertIsInstance(view, tv.TextViewer) - self.assertIn('Test', view.textView.get('1.0', '1.end')) - view.Ok() - - # Mock messagebox will be used; view_file will return None. - testfile = os.path.join(test_dir, '../notthere.py') - view = tv.view_file(self.root, 'Title', testfile, modal=False) - self.assertIsNone(view) - - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/python/Lib/idlelib/idle_test/test_warning.py b/python/Lib/idlelib/idle_test/test_warning.py deleted file mode 100644 index da1d8a1d0a..0000000000 --- a/python/Lib/idlelib/idle_test/test_warning.py +++ /dev/null @@ -1,73 +0,0 @@ -'''Test warnings replacement in PyShell.py and run.py. - -This file could be expanded to include traceback overrides -(in same two modules). If so, change name. -Revise if output destination changes (http://bugs.python.org/issue18318). -Make sure warnings module is left unaltered (http://bugs.python.org/issue18081). -''' - -import unittest -from test.test_support import captured_stderr - -import warnings -# Try to capture default showwarning before Idle modules are imported. -showwarning = warnings.showwarning -# But if we run this file within idle, we are in the middle of the run.main loop -# and default showwarnings has already been replaced. -running_in_idle = 'idle' in showwarning.__name__ - -from idlelib import run -from idlelib import PyShell as shell - -# The following was generated from PyShell.idle_formatwarning -# and checked as matching expectation. -idlemsg = ''' -Warning (from warnings module): - File "test_warning.py", line 99 - Line of code -UserWarning: Test -''' -shellmsg = idlemsg + ">>> " - -class RunWarnTest(unittest.TestCase): - - @unittest.skipIf(running_in_idle, "Does not work when run within Idle.") - def test_showwarnings(self): - self.assertIs(warnings.showwarning, showwarning) - run.capture_warnings(True) - self.assertIs(warnings.showwarning, run.idle_showwarning_subproc) - run.capture_warnings(False) - self.assertIs(warnings.showwarning, showwarning) - - def test_run_show(self): - with captured_stderr() as f: - run.idle_showwarning_subproc( - 'Test', UserWarning, 'test_warning.py', 99, f, 'Line of code') - # The following uses .splitlines to erase line-ending differences - self.assertEqual(idlemsg.splitlines(), f.getvalue().splitlines()) - -class ShellWarnTest(unittest.TestCase): - - @unittest.skipIf(running_in_idle, "Does not work when run within Idle.") - def test_showwarnings(self): - self.assertIs(warnings.showwarning, showwarning) - shell.capture_warnings(True) - self.assertIs(warnings.showwarning, shell.idle_showwarning) - shell.capture_warnings(False) - self.assertIs(warnings.showwarning, showwarning) - - def test_idle_formatter(self): - # Will fail if format changed without regenerating idlemsg - s = shell.idle_formatwarning( - 'Test', UserWarning, 'test_warning.py', 99, 'Line of code') - self.assertEqual(idlemsg, s) - - def test_shell_show(self): - with captured_stderr() as f: - shell.idle_showwarning( - 'Test', UserWarning, 'test_warning.py', 99, f, 'Line of code') - self.assertEqual(shellmsg.splitlines(), f.getvalue().splitlines()) - - -if __name__ == '__main__': - unittest.main(verbosity=2, exit=False) diff --git a/python/Lib/idlelib/idle_test/test_widgetredir.py b/python/Lib/idlelib/idle_test/test_widgetredir.py deleted file mode 100644 index e35ea4174d..0000000000 --- a/python/Lib/idlelib/idle_test/test_widgetredir.py +++ /dev/null @@ -1,124 +0,0 @@ -"""Unittest for idlelib.WidgetRedirector - -100% coverage -""" -from test.test_support import requires -import unittest -from idlelib.idle_test.mock_idle import Func -from Tkinter import Tk, Text, TclError -from idlelib.WidgetRedirector import WidgetRedirector - - -class InitCloseTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = Tk() - cls.root.withdraw() - cls.text = Text(cls.root) - - @classmethod - def tearDownClass(cls): - del cls.text - cls.root.destroy() - del cls.root - - def test_init(self): - redir = WidgetRedirector(self.text) - self.assertEqual(redir.widget, self.text) - self.assertEqual(redir.tk, self.text.tk) - self.assertRaises(TclError, WidgetRedirector, self.text) - redir.close() # restore self.tk, self.text - - def test_close(self): - redir = WidgetRedirector(self.text) - redir.register('insert', Func) - redir.close() - self.assertEqual(redir._operations, {}) - self.assertFalse(hasattr(self.text, 'widget')) - - -class WidgetRedirectorTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - requires('gui') - cls.root = Tk() - cls.root.withdraw() - cls.text = Text(cls.root) - - @classmethod - def tearDownClass(cls): - del cls.text - cls.root.destroy() - del cls.root - - def setUp(self): - self.redir = WidgetRedirector(self.text) - self.func = Func() - self.orig_insert = self.redir.register('insert', self.func) - self.text.insert('insert', 'asdf') # leaves self.text empty - - def tearDown(self): - self.text.delete('1.0', 'end') - self.redir.close() - - def test_repr(self): # partly for 100% coverage - self.assertIn('Redirector', repr(self.redir)) - self.assertIn('Original', repr(self.orig_insert)) - - def test_register(self): - self.assertEqual(self.text.get('1.0', 'end'), '\n') - self.assertEqual(self.func.args, ('insert', 'asdf')) - self.assertIn('insert', self.redir._operations) - self.assertIn('insert', self.text.__dict__) - self.assertEqual(self.text.insert, self.func) - - def test_original_command(self): - self.assertEqual(self.orig_insert.operation, 'insert') - self.assertEqual(self.orig_insert.tk_call, self.text.tk.call) - self.orig_insert('insert', 'asdf') - self.assertEqual(self.text.get('1.0', 'end'), 'asdf\n') - - def test_unregister(self): - self.assertIsNone(self.redir.unregister('invalid operation name')) - self.assertEqual(self.redir.unregister('insert'), self.func) - self.assertNotIn('insert', self.redir._operations) - self.assertNotIn('insert', self.text.__dict__) - - def test_unregister_no_attribute(self): - del self.text.insert - self.assertEqual(self.redir.unregister('insert'), self.func) - - def test_dispatch_intercept(self): - self.func.__init__(True) - self.assertTrue(self.redir.dispatch('insert', False)) - self.assertFalse(self.func.args[0]) - - def test_dispatch_bypass(self): - self.orig_insert('insert', 'asdf') - # tk.call returns '' where Python would return None - self.assertEqual(self.redir.dispatch('delete', '1.0', 'end'), '') - self.assertEqual(self.text.get('1.0', 'end'), '\n') - - def test_dispatch_error(self): - self.func.__init__(TclError()) - self.assertEqual(self.redir.dispatch('insert', False), '') - self.assertEqual(self.redir.dispatch('invalid'), '') - - def test_command_dispatch(self): - # Test that .__init__ causes redirection of tk calls - # through redir.dispatch - self.root.call(self.text._w, 'insert', 'hello') - self.assertEqual(self.func.args, ('hello',)) - self.assertEqual(self.text.get('1.0', 'end'), '\n') - # Ensure that called through redir .dispatch and not through - # self.text.insert by having mock raise TclError. - self.func.__init__(TclError()) - self.assertEqual(self.root.call(self.text._w, 'insert', 'boo'), '') - - - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/python/Lib/ihooks.py b/python/Lib/ihooks.py deleted file mode 100755 index 8761dac7cd..0000000000 --- a/python/Lib/ihooks.py +++ /dev/null @@ -1,554 +0,0 @@ -"""Import hook support. - -Consistent use of this module will make it possible to change the -different mechanisms involved in loading modules independently. - -While the built-in module imp exports interfaces to the built-in -module searching and loading algorithm, and it is possible to replace -the built-in function __import__ in order to change the semantics of -the import statement, until now it has been difficult to combine the -effect of different __import__ hacks, like loading modules from URLs -by rimport.py, or restricted execution by rexec.py. - -This module defines three new concepts: - -1) A "file system hooks" class provides an interface to a filesystem. - -One hooks class is defined (Hooks), which uses the interface provided -by standard modules os and os.path. It should be used as the base -class for other hooks classes. - -2) A "module loader" class provides an interface to search for a -module in a search path and to load it. It defines a method which -searches for a module in a single directory; by overriding this method -one can redefine the details of the search. If the directory is None, -built-in and frozen modules are searched instead. - -Two module loader class are defined, both implementing the search -strategy used by the built-in __import__ function: ModuleLoader uses -the imp module's find_module interface, while HookableModuleLoader -uses a file system hooks class to interact with the file system. Both -use the imp module's load_* interfaces to actually load the module. - -3) A "module importer" class provides an interface to import a -module, as well as interfaces to reload and unload a module. It also -provides interfaces to install and uninstall itself instead of the -default __import__ and reload (and unload) functions. - -One module importer class is defined (ModuleImporter), which uses a -module loader instance passed in (by default HookableModuleLoader is -instantiated). - -The classes defined here should be used as base classes for extended -functionality along those lines. - -If a module importer class supports dotted names, its import_module() -must return a different value depending on whether it is called on -behalf of a "from ... import ..." statement or not. (This is caused -by the way the __import__ hook is used by the Python interpreter.) It -would also do wise to install a different version of reload(). - -""" -from warnings import warnpy3k, warn -warnpy3k("the ihooks module has been removed in Python 3.0", stacklevel=2) -del warnpy3k - -import __builtin__ -import imp -import os -import sys - -__all__ = ["BasicModuleLoader","Hooks","ModuleLoader","FancyModuleLoader", - "BasicModuleImporter","ModuleImporter","install","uninstall"] - -VERBOSE = 0 - - -from imp import C_EXTENSION, PY_SOURCE, PY_COMPILED -from imp import C_BUILTIN, PY_FROZEN, PKG_DIRECTORY -BUILTIN_MODULE = C_BUILTIN -FROZEN_MODULE = PY_FROZEN - - -class _Verbose: - - def __init__(self, verbose = VERBOSE): - self.verbose = verbose - - def get_verbose(self): - return self.verbose - - def set_verbose(self, verbose): - self.verbose = verbose - - # XXX The following is an experimental interface - - def note(self, *args): - if self.verbose: - self.message(*args) - - def message(self, format, *args): - if args: - print format%args - else: - print format - - -class BasicModuleLoader(_Verbose): - - """Basic module loader. - - This provides the same functionality as built-in import. It - doesn't deal with checking sys.modules -- all it provides is - find_module() and a load_module(), as well as find_module_in_dir() - which searches just one directory, and can be overridden by a - derived class to change the module search algorithm when the basic - dependency on sys.path is unchanged. - - The interface is a little more convenient than imp's: - find_module(name, [path]) returns None or 'stuff', and - load_module(name, stuff) loads the module. - - """ - - def find_module(self, name, path = None): - if path is None: - path = [None] + self.default_path() - for dir in path: - stuff = self.find_module_in_dir(name, dir) - if stuff: return stuff - return None - - def default_path(self): - return sys.path - - def find_module_in_dir(self, name, dir): - if dir is None: - return self.find_builtin_module(name) - else: - try: - return imp.find_module(name, [dir]) - except ImportError: - return None - - def find_builtin_module(self, name): - # XXX frozen packages? - if imp.is_builtin(name): - return None, '', ('', '', BUILTIN_MODULE) - if imp.is_frozen(name): - return None, '', ('', '', FROZEN_MODULE) - return None - - def load_module(self, name, stuff): - file, filename, info = stuff - try: - return imp.load_module(name, file, filename, info) - finally: - if file: file.close() - - -class Hooks(_Verbose): - - """Hooks into the filesystem and interpreter. - - By deriving a subclass you can redefine your filesystem interface, - e.g. to merge it with the URL space. - - This base class behaves just like the native filesystem. - - """ - - # imp interface - def get_suffixes(self): return imp.get_suffixes() - def new_module(self, name): return imp.new_module(name) - def is_builtin(self, name): return imp.is_builtin(name) - def init_builtin(self, name): return imp.init_builtin(name) - def is_frozen(self, name): return imp.is_frozen(name) - def init_frozen(self, name): return imp.init_frozen(name) - def get_frozen_object(self, name): return imp.get_frozen_object(name) - def load_source(self, name, filename, file=None): - return imp.load_source(name, filename, file) - def load_compiled(self, name, filename, file=None): - return imp.load_compiled(name, filename, file) - def load_dynamic(self, name, filename, file=None): - return imp.load_dynamic(name, filename, file) - def load_package(self, name, filename, file=None): - return imp.load_module(name, file, filename, ("", "", PKG_DIRECTORY)) - - def add_module(self, name): - d = self.modules_dict() - if name in d: return d[name] - d[name] = m = self.new_module(name) - return m - - # sys interface - def modules_dict(self): return sys.modules - def default_path(self): return sys.path - - def path_split(self, x): return os.path.split(x) - def path_join(self, x, y): return os.path.join(x, y) - def path_isabs(self, x): return os.path.isabs(x) - # etc. - - def path_exists(self, x): return os.path.exists(x) - def path_isdir(self, x): return os.path.isdir(x) - def path_isfile(self, x): return os.path.isfile(x) - def path_islink(self, x): return os.path.islink(x) - # etc. - - def openfile(self, *x): return open(*x) - openfile_error = IOError - def listdir(self, x): return os.listdir(x) - listdir_error = os.error - # etc. - - -class ModuleLoader(BasicModuleLoader): - - """Default module loader; uses file system hooks. - - By defining suitable hooks, you might be able to load modules from - other sources than the file system, e.g. from compressed or - encrypted files, tar files or (if you're brave!) URLs. - - """ - - def __init__(self, hooks = None, verbose = VERBOSE): - BasicModuleLoader.__init__(self, verbose) - self.hooks = hooks or Hooks(verbose) - - def default_path(self): - return self.hooks.default_path() - - def modules_dict(self): - return self.hooks.modules_dict() - - def get_hooks(self): - return self.hooks - - def set_hooks(self, hooks): - self.hooks = hooks - - def find_builtin_module(self, name): - # XXX frozen packages? - if self.hooks.is_builtin(name): - return None, '', ('', '', BUILTIN_MODULE) - if self.hooks.is_frozen(name): - return None, '', ('', '', FROZEN_MODULE) - return None - - def find_module_in_dir(self, name, dir, allow_packages=1): - if dir is None: - return self.find_builtin_module(name) - if allow_packages: - fullname = self.hooks.path_join(dir, name) - if self.hooks.path_isdir(fullname): - stuff = self.find_module_in_dir("__init__", fullname, 0) - if stuff: - file = stuff[0] - if file: file.close() - return None, fullname, ('', '', PKG_DIRECTORY) - for info in self.hooks.get_suffixes(): - suff, mode, type = info - fullname = self.hooks.path_join(dir, name+suff) - try: - fp = self.hooks.openfile(fullname, mode) - return fp, fullname, info - except self.hooks.openfile_error: - pass - return None - - def load_module(self, name, stuff): - file, filename, info = stuff - (suff, mode, type) = info - try: - if type == BUILTIN_MODULE: - return self.hooks.init_builtin(name) - if type == FROZEN_MODULE: - return self.hooks.init_frozen(name) - if type == C_EXTENSION: - m = self.hooks.load_dynamic(name, filename, file) - elif type == PY_SOURCE: - m = self.hooks.load_source(name, filename, file) - elif type == PY_COMPILED: - m = self.hooks.load_compiled(name, filename, file) - elif type == PKG_DIRECTORY: - m = self.hooks.load_package(name, filename, file) - else: - raise ImportError, "Unrecognized module type (%r) for %s" % \ - (type, name) - finally: - if file: file.close() - m.__file__ = filename - return m - - -class FancyModuleLoader(ModuleLoader): - - """Fancy module loader -- parses and execs the code itself.""" - - def load_module(self, name, stuff): - file, filename, (suff, mode, type) = stuff - realfilename = filename - path = None - - if type == PKG_DIRECTORY: - initstuff = self.find_module_in_dir("__init__", filename, 0) - if not initstuff: - raise ImportError, "No __init__ module in package %s" % name - initfile, initfilename, initinfo = initstuff - initsuff, initmode, inittype = initinfo - if inittype not in (PY_COMPILED, PY_SOURCE): - if initfile: initfile.close() - raise ImportError, \ - "Bad type (%r) for __init__ module in package %s" % ( - inittype, name) - path = [filename] - file = initfile - realfilename = initfilename - type = inittype - - if type == FROZEN_MODULE: - code = self.hooks.get_frozen_object(name) - elif type == PY_COMPILED: - import marshal - file.seek(8) - code = marshal.load(file) - elif type == PY_SOURCE: - data = file.read() - code = compile(data, realfilename, 'exec') - else: - return ModuleLoader.load_module(self, name, stuff) - - m = self.hooks.add_module(name) - if path: - m.__path__ = path - m.__file__ = filename - try: - exec code in m.__dict__ - except: - d = self.hooks.modules_dict() - if name in d: - del d[name] - raise - return m - - -class BasicModuleImporter(_Verbose): - - """Basic module importer; uses module loader. - - This provides basic import facilities but no package imports. - - """ - - def __init__(self, loader = None, verbose = VERBOSE): - _Verbose.__init__(self, verbose) - self.loader = loader or ModuleLoader(None, verbose) - self.modules = self.loader.modules_dict() - - def get_loader(self): - return self.loader - - def set_loader(self, loader): - self.loader = loader - - def get_hooks(self): - return self.loader.get_hooks() - - def set_hooks(self, hooks): - return self.loader.set_hooks(hooks) - - def import_module(self, name, globals={}, locals={}, fromlist=[]): - name = str(name) - if name in self.modules: - return self.modules[name] # Fast path - stuff = self.loader.find_module(name) - if not stuff: - raise ImportError, "No module named %s" % name - return self.loader.load_module(name, stuff) - - def reload(self, module, path = None): - name = str(module.__name__) - stuff = self.loader.find_module(name, path) - if not stuff: - raise ImportError, "Module %s not found for reload" % name - return self.loader.load_module(name, stuff) - - def unload(self, module): - del self.modules[str(module.__name__)] - # XXX Should this try to clear the module's namespace? - - def install(self): - self.save_import_module = __builtin__.__import__ - self.save_reload = __builtin__.reload - if not hasattr(__builtin__, 'unload'): - __builtin__.unload = None - self.save_unload = __builtin__.unload - __builtin__.__import__ = self.import_module - __builtin__.reload = self.reload - __builtin__.unload = self.unload - - def uninstall(self): - __builtin__.__import__ = self.save_import_module - __builtin__.reload = self.save_reload - __builtin__.unload = self.save_unload - if not __builtin__.unload: - del __builtin__.unload - - -class ModuleImporter(BasicModuleImporter): - - """A module importer that supports packages.""" - - def import_module(self, name, globals=None, locals=None, fromlist=None, - level=-1): - parent = self.determine_parent(globals, level) - q, tail = self.find_head_package(parent, str(name)) - m = self.load_tail(q, tail) - if not fromlist: - return q - if hasattr(m, "__path__"): - self.ensure_fromlist(m, fromlist) - return m - - def determine_parent(self, globals, level=-1): - if not globals or not level: - return None - pkgname = globals.get('__package__') - if pkgname is not None: - if not pkgname and level > 0: - raise ValueError, 'Attempted relative import in non-package' - else: - # __package__ not set, figure it out and set it - modname = globals.get('__name__') - if modname is None: - return None - if "__path__" in globals: - # __path__ is set so modname is already the package name - pkgname = modname - else: - # normal module, work out package name if any - if '.' not in modname: - if level > 0: - raise ValueError, ('Attempted relative import in ' - 'non-package') - globals['__package__'] = None - return None - pkgname = modname.rpartition('.')[0] - globals['__package__'] = pkgname - if level > 0: - dot = len(pkgname) - for x in range(level, 1, -1): - try: - dot = pkgname.rindex('.', 0, dot) - except ValueError: - raise ValueError('attempted relative import beyond ' - 'top-level package') - pkgname = pkgname[:dot] - try: - return sys.modules[pkgname] - except KeyError: - if level < 1: - warn("Parent module '%s' not found while handling " - "absolute import" % pkgname, RuntimeWarning, 1) - return None - else: - raise SystemError, ("Parent module '%s' not loaded, cannot " - "perform relative import" % pkgname) - - def find_head_package(self, parent, name): - if '.' in name: - i = name.find('.') - head = name[:i] - tail = name[i+1:] - else: - head = name - tail = "" - if parent: - qname = "%s.%s" % (parent.__name__, head) - else: - qname = head - q = self.import_it(head, qname, parent) - if q: return q, tail - if parent: - qname = head - parent = None - q = self.import_it(head, qname, parent) - if q: return q, tail - raise ImportError, "No module named '%s'" % qname - - def load_tail(self, q, tail): - m = q - while tail: - i = tail.find('.') - if i < 0: i = len(tail) - head, tail = tail[:i], tail[i+1:] - mname = "%s.%s" % (m.__name__, head) - m = self.import_it(head, mname, m) - if not m: - raise ImportError, "No module named '%s'" % mname - return m - - def ensure_fromlist(self, m, fromlist, recursive=0): - for sub in fromlist: - if sub == "*": - if not recursive: - try: - all = m.__all__ - except AttributeError: - pass - else: - self.ensure_fromlist(m, all, 1) - continue - if sub != "*" and not hasattr(m, sub): - subname = "%s.%s" % (m.__name__, sub) - submod = self.import_it(sub, subname, m) - if not submod: - raise ImportError, "No module named '%s'" % subname - - def import_it(self, partname, fqname, parent, force_load=0): - if not partname: - # completely empty module name should only happen in - # 'from . import' or __import__("") - return parent - if not force_load: - try: - return self.modules[fqname] - except KeyError: - pass - try: - path = parent and parent.__path__ - except AttributeError: - return None - partname = str(partname) - stuff = self.loader.find_module(partname, path) - if not stuff: - return None - fqname = str(fqname) - m = self.loader.load_module(fqname, stuff) - if parent: - setattr(parent, partname, m) - return m - - def reload(self, module): - name = str(module.__name__) - if '.' not in name: - return self.import_it(name, name, None, force_load=1) - i = name.rfind('.') - pname = name[:i] - parent = self.modules[pname] - return self.import_it(name[i+1:], name, parent, force_load=1) - - -default_importer = None -current_importer = None - -def install(importer = None): - global current_importer - current_importer = importer or default_importer or ModuleImporter() - current_importer.install() - -def uninstall(): - global current_importer - current_importer.uninstall() diff --git a/python/Lib/imaplib.py b/python/Lib/imaplib.py deleted file mode 100755 index 826eea2524..0000000000 --- a/python/Lib/imaplib.py +++ /dev/null @@ -1,1536 +0,0 @@ -"""IMAP4 client. - -Based on RFC 2060. - -Public class: IMAP4 -Public variable: Debug -Public functions: Internaldate2tuple - Int2AP - ParseFlags - Time2Internaldate -""" - -# Author: Piers Lauder December 1997. -# -# Authentication code contributed by Donn Cave June 1998. -# String method conversion by ESR, February 2001. -# GET/SETACL contributed by Anthony Baxter April 2001. -# IMAP4_SSL contributed by Tino Lange March 2002. -# GET/SETQUOTA contributed by Andreas Zeidler June 2002. -# PROXYAUTH contributed by Rick Holbert November 2002. -# GET/SETANNOTATION contributed by Tomas Lindroos June 2005. - -__version__ = "2.58" - -import binascii, errno, random, re, socket, subprocess, sys, time - -__all__ = ["IMAP4", "IMAP4_stream", "Internaldate2tuple", - "Int2AP", "ParseFlags", "Time2Internaldate"] - -# Globals - -CRLF = '\r\n' -Debug = 0 -IMAP4_PORT = 143 -IMAP4_SSL_PORT = 993 -AllowedVersions = ('IMAP4REV1', 'IMAP4') # Most recent first - -# Maximal line length when calling readline(). This is to prevent -# reading arbitrary length lines. RFC 3501 and 2060 (IMAP 4rev1) -# don't specify a line length. RFC 2683 suggests limiting client -# command lines to 1000 octets and that servers should be prepared -# to accept command lines up to 8000 octets, so we used to use 10K here. -# In the modern world (eg: gmail) the response to, for example, a -# search command can be quite large, so we now use 1M. -_MAXLINE = 1000000 - - -# Commands - -Commands = { - # name valid states - 'APPEND': ('AUTH', 'SELECTED'), - 'AUTHENTICATE': ('NONAUTH',), - 'CAPABILITY': ('NONAUTH', 'AUTH', 'SELECTED', 'LOGOUT'), - 'CHECK': ('SELECTED',), - 'CLOSE': ('SELECTED',), - 'COPY': ('SELECTED',), - 'CREATE': ('AUTH', 'SELECTED'), - 'DELETE': ('AUTH', 'SELECTED'), - 'DELETEACL': ('AUTH', 'SELECTED'), - 'EXAMINE': ('AUTH', 'SELECTED'), - 'EXPUNGE': ('SELECTED',), - 'FETCH': ('SELECTED',), - 'GETACL': ('AUTH', 'SELECTED'), - 'GETANNOTATION':('AUTH', 'SELECTED'), - 'GETQUOTA': ('AUTH', 'SELECTED'), - 'GETQUOTAROOT': ('AUTH', 'SELECTED'), - 'MYRIGHTS': ('AUTH', 'SELECTED'), - 'LIST': ('AUTH', 'SELECTED'), - 'LOGIN': ('NONAUTH',), - 'LOGOUT': ('NONAUTH', 'AUTH', 'SELECTED', 'LOGOUT'), - 'LSUB': ('AUTH', 'SELECTED'), - 'NAMESPACE': ('AUTH', 'SELECTED'), - 'NOOP': ('NONAUTH', 'AUTH', 'SELECTED', 'LOGOUT'), - 'PARTIAL': ('SELECTED',), # NB: obsolete - 'PROXYAUTH': ('AUTH',), - 'RENAME': ('AUTH', 'SELECTED'), - 'SEARCH': ('SELECTED',), - 'SELECT': ('AUTH', 'SELECTED'), - 'SETACL': ('AUTH', 'SELECTED'), - 'SETANNOTATION':('AUTH', 'SELECTED'), - 'SETQUOTA': ('AUTH', 'SELECTED'), - 'SORT': ('SELECTED',), - 'STATUS': ('AUTH', 'SELECTED'), - 'STORE': ('SELECTED',), - 'SUBSCRIBE': ('AUTH', 'SELECTED'), - 'THREAD': ('SELECTED',), - 'UID': ('SELECTED',), - 'UNSUBSCRIBE': ('AUTH', 'SELECTED'), - } - -# Patterns to match server responses - -Continuation = re.compile(r'\+( (?P.*))?') -Flags = re.compile(r'.*FLAGS \((?P[^\)]*)\)') -InternalDate = re.compile(r'.*INTERNALDATE "' - r'(?P[ 0123][0-9])-(?P[A-Z][a-z][a-z])-(?P[0-9][0-9][0-9][0-9])' - r' (?P[0-9][0-9]):(?P[0-9][0-9]):(?P[0-9][0-9])' - r' (?P[-+])(?P[0-9][0-9])(?P[0-9][0-9])' - r'"') -Literal = re.compile(r'.*{(?P\d+)}$') -MapCRLF = re.compile(r'\r\n|\r|\n') -Response_code = re.compile(r'\[(?P[A-Z-]+)( (?P[^\]]*))?\]') -Untagged_response = re.compile(r'\* (?P[A-Z-]+)( (?P.*))?') -Untagged_status = re.compile(r'\* (?P\d+) (?P[A-Z-]+)( (?P.*))?') - - - -class IMAP4: - - """IMAP4 client class. - - Instantiate with: IMAP4([host[, port]]) - - host - host's name (default: localhost); - port - port number (default: standard IMAP4 port). - - All IMAP4rev1 commands are supported by methods of the same - name (in lower-case). - - All arguments to commands are converted to strings, except for - AUTHENTICATE, and the last argument to APPEND which is passed as - an IMAP4 literal. If necessary (the string contains any - non-printing characters or white-space and isn't enclosed with - either parentheses or double quotes) each string is quoted. - However, the 'password' argument to the LOGIN command is always - quoted. If you want to avoid having an argument string quoted - (eg: the 'flags' argument to STORE) then enclose the string in - parentheses (eg: "(\Deleted)"). - - Each command returns a tuple: (type, [data, ...]) where 'type' - is usually 'OK' or 'NO', and 'data' is either the text from the - tagged response, or untagged results from command. Each 'data' - is either a string, or a tuple. If a tuple, then the first part - is the header of the response, and the second part contains - the data (ie: 'literal' value). - - Errors raise the exception class .error(""). - IMAP4 server errors raise .abort(""), - which is a sub-class of 'error'. Mailbox status changes - from READ-WRITE to READ-ONLY raise the exception class - .readonly(""), which is a sub-class of 'abort'. - - "error" exceptions imply a program error. - "abort" exceptions imply the connection should be reset, and - the command re-tried. - "readonly" exceptions imply the command should be re-tried. - - Note: to use this module, you must read the RFCs pertaining to the - IMAP4 protocol, as the semantics of the arguments to each IMAP4 - command are left to the invoker, not to mention the results. Also, - most IMAP servers implement a sub-set of the commands available here. - """ - - class error(Exception): pass # Logical errors - debug required - class abort(error): pass # Service errors - close and retry - class readonly(abort): pass # Mailbox status changed to READ-ONLY - - mustquote = re.compile(r"[^\w!#$%&'*+,.:;<=>?^`|~-]") - - def __init__(self, host = '', port = IMAP4_PORT): - self.debug = Debug - self.state = 'LOGOUT' - self.literal = None # A literal argument to a command - self.tagged_commands = {} # Tagged commands awaiting response - self.untagged_responses = {} # {typ: [data, ...], ...} - self.continuation_response = '' # Last continuation response - self.is_readonly = False # READ-ONLY desired state - self.tagnum = 0 - - # Open socket to server. - - self.open(host, port) - - # Create unique tag for this session, - # and compile tagged response matcher. - - self.tagpre = Int2AP(random.randint(4096, 65535)) - self.tagre = re.compile(r'(?P' - + self.tagpre - + r'\d+) (?P[A-Z]+) (?P.*)') - - # Get server welcome message, - # request and store CAPABILITY response. - - if __debug__: - self._cmd_log_len = 10 - self._cmd_log_idx = 0 - self._cmd_log = {} # Last `_cmd_log_len' interactions - if self.debug >= 1: - self._mesg('imaplib version %s' % __version__) - self._mesg('new IMAP4 connection, tag=%s' % self.tagpre) - - self.welcome = self._get_response() - if 'PREAUTH' in self.untagged_responses: - self.state = 'AUTH' - elif 'OK' in self.untagged_responses: - self.state = 'NONAUTH' - else: - raise self.error(self.welcome) - - typ, dat = self.capability() - if dat == [None]: - raise self.error('no CAPABILITY response from server') - self.capabilities = tuple(dat[-1].upper().split()) - - if __debug__: - if self.debug >= 3: - self._mesg('CAPABILITIES: %r' % (self.capabilities,)) - - for version in AllowedVersions: - if not version in self.capabilities: - continue - self.PROTOCOL_VERSION = version - return - - raise self.error('server not IMAP4 compliant') - - - def __getattr__(self, attr): - # Allow UPPERCASE variants of IMAP4 command methods. - if attr in Commands: - return getattr(self, attr.lower()) - raise AttributeError("Unknown IMAP4 command: '%s'" % attr) - - - - # Overridable methods - - - def open(self, host = '', port = IMAP4_PORT): - """Setup connection to remote server on "host:port" - (default: localhost:standard IMAP4 port). - This connection will be used by the routines: - read, readline, send, shutdown. - """ - self.host = host - self.port = port - self.sock = socket.create_connection((host, port)) - self.file = self.sock.makefile('rb') - - - def read(self, size): - """Read 'size' bytes from remote.""" - return self.file.read(size) - - - def readline(self): - """Read line from remote.""" - line = self.file.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise self.error("got more than %d bytes" % _MAXLINE) - return line - - - def send(self, data): - """Send data to remote.""" - self.sock.sendall(data) - - - def shutdown(self): - """Close I/O established in "open".""" - self.file.close() - try: - self.sock.shutdown(socket.SHUT_RDWR) - except socket.error as e: - # The server might already have closed the connection - if e.errno != errno.ENOTCONN: - raise - finally: - self.sock.close() - - - def socket(self): - """Return socket instance used to connect to IMAP4 server. - - socket = .socket() - """ - return self.sock - - - - # Utility methods - - - def recent(self): - """Return most recent 'RECENT' responses if any exist, - else prompt server for an update using the 'NOOP' command. - - (typ, [data]) = .recent() - - 'data' is None if no new messages, - else list of RECENT responses, most recent last. - """ - name = 'RECENT' - typ, dat = self._untagged_response('OK', [None], name) - if dat[-1]: - return typ, dat - typ, dat = self.noop() # Prod server for response - return self._untagged_response(typ, dat, name) - - - def response(self, code): - """Return data for response 'code' if received, or None. - - Old value for response 'code' is cleared. - - (code, [data]) = .response(code) - """ - return self._untagged_response(code, [None], code.upper()) - - - - # IMAP4 commands - - - def append(self, mailbox, flags, date_time, message): - """Append message to named mailbox. - - (typ, [data]) = .append(mailbox, flags, date_time, message) - - All args except `message' can be None. - """ - name = 'APPEND' - if not mailbox: - mailbox = 'INBOX' - if flags: - if (flags[0],flags[-1]) != ('(',')'): - flags = '(%s)' % flags - else: - flags = None - if date_time: - date_time = Time2Internaldate(date_time) - else: - date_time = None - self.literal = MapCRLF.sub(CRLF, message) - return self._simple_command(name, mailbox, flags, date_time) - - - def authenticate(self, mechanism, authobject): - """Authenticate command - requires response processing. - - 'mechanism' specifies which authentication mechanism is to - be used - it must appear in .capabilities in the - form AUTH=. - - 'authobject' must be a callable object: - - data = authobject(response) - - It will be called to process server continuation responses. - It should return data that will be encoded and sent to server. - It should return None if the client abort response '*' should - be sent instead. - """ - mech = mechanism.upper() - # XXX: shouldn't this code be removed, not commented out? - #cap = 'AUTH=%s' % mech - #if not cap in self.capabilities: # Let the server decide! - # raise self.error("Server doesn't allow %s authentication." % mech) - self.literal = _Authenticator(authobject).process - typ, dat = self._simple_command('AUTHENTICATE', mech) - if typ != 'OK': - raise self.error(dat[-1]) - self.state = 'AUTH' - return typ, dat - - - def capability(self): - """(typ, [data]) = .capability() - Fetch capabilities list from server.""" - - name = 'CAPABILITY' - typ, dat = self._simple_command(name) - return self._untagged_response(typ, dat, name) - - - def check(self): - """Checkpoint mailbox on server. - - (typ, [data]) = .check() - """ - return self._simple_command('CHECK') - - - def close(self): - """Close currently selected mailbox. - - Deleted messages are removed from writable mailbox. - This is the recommended command before 'LOGOUT'. - - (typ, [data]) = .close() - """ - try: - typ, dat = self._simple_command('CLOSE') - finally: - self.state = 'AUTH' - return typ, dat - - - def copy(self, message_set, new_mailbox): - """Copy 'message_set' messages onto end of 'new_mailbox'. - - (typ, [data]) = .copy(message_set, new_mailbox) - """ - return self._simple_command('COPY', message_set, new_mailbox) - - - def create(self, mailbox): - """Create new mailbox. - - (typ, [data]) = .create(mailbox) - """ - return self._simple_command('CREATE', mailbox) - - - def delete(self, mailbox): - """Delete old mailbox. - - (typ, [data]) = .delete(mailbox) - """ - return self._simple_command('DELETE', mailbox) - - def deleteacl(self, mailbox, who): - """Delete the ACLs (remove any rights) set for who on mailbox. - - (typ, [data]) = .deleteacl(mailbox, who) - """ - return self._simple_command('DELETEACL', mailbox, who) - - def expunge(self): - """Permanently remove deleted items from selected mailbox. - - Generates 'EXPUNGE' response for each deleted message. - - (typ, [data]) = .expunge() - - 'data' is list of 'EXPUNGE'd message numbers in order received. - """ - name = 'EXPUNGE' - typ, dat = self._simple_command(name) - return self._untagged_response(typ, dat, name) - - - def fetch(self, message_set, message_parts): - """Fetch (parts of) messages. - - (typ, [data, ...]) = .fetch(message_set, message_parts) - - 'message_parts' should be a string of selected parts - enclosed in parentheses, eg: "(UID BODY[TEXT])". - - 'data' are tuples of message part envelope and data. - """ - name = 'FETCH' - typ, dat = self._simple_command(name, message_set, message_parts) - return self._untagged_response(typ, dat, name) - - - def getacl(self, mailbox): - """Get the ACLs for a mailbox. - - (typ, [data]) = .getacl(mailbox) - """ - typ, dat = self._simple_command('GETACL', mailbox) - return self._untagged_response(typ, dat, 'ACL') - - - def getannotation(self, mailbox, entry, attribute): - """(typ, [data]) = .getannotation(mailbox, entry, attribute) - Retrieve ANNOTATIONs.""" - - typ, dat = self._simple_command('GETANNOTATION', mailbox, entry, attribute) - return self._untagged_response(typ, dat, 'ANNOTATION') - - - def getquota(self, root): - """Get the quota root's resource usage and limits. - - Part of the IMAP4 QUOTA extension defined in rfc2087. - - (typ, [data]) = .getquota(root) - """ - typ, dat = self._simple_command('GETQUOTA', root) - return self._untagged_response(typ, dat, 'QUOTA') - - - def getquotaroot(self, mailbox): - """Get the list of quota roots for the named mailbox. - - (typ, [[QUOTAROOT responses...], [QUOTA responses]]) = .getquotaroot(mailbox) - """ - typ, dat = self._simple_command('GETQUOTAROOT', mailbox) - typ, quota = self._untagged_response(typ, dat, 'QUOTA') - typ, quotaroot = self._untagged_response(typ, dat, 'QUOTAROOT') - return typ, [quotaroot, quota] - - - def list(self, directory='""', pattern='*'): - """List mailbox names in directory matching pattern. - - (typ, [data]) = .list(directory='""', pattern='*') - - 'data' is list of LIST responses. - """ - name = 'LIST' - typ, dat = self._simple_command(name, directory, pattern) - return self._untagged_response(typ, dat, name) - - - def login(self, user, password): - """Identify client using plaintext password. - - (typ, [data]) = .login(user, password) - - NB: 'password' will be quoted. - """ - typ, dat = self._simple_command('LOGIN', user, self._quote(password)) - if typ != 'OK': - raise self.error(dat[-1]) - self.state = 'AUTH' - return typ, dat - - - def login_cram_md5(self, user, password): - """ Force use of CRAM-MD5 authentication. - - (typ, [data]) = .login_cram_md5(user, password) - """ - self.user, self.password = user, password - return self.authenticate('CRAM-MD5', self._CRAM_MD5_AUTH) - - - def _CRAM_MD5_AUTH(self, challenge): - """ Authobject to use with CRAM-MD5 authentication. """ - import hmac - return self.user + " " + hmac.HMAC(self.password, challenge).hexdigest() - - - def logout(self): - """Shutdown connection to server. - - (typ, [data]) = .logout() - - Returns server 'BYE' response. - """ - self.state = 'LOGOUT' - try: typ, dat = self._simple_command('LOGOUT') - except: typ, dat = 'NO', ['%s: %s' % sys.exc_info()[:2]] - self.shutdown() - if 'BYE' in self.untagged_responses: - return 'BYE', self.untagged_responses['BYE'] - return typ, dat - - - def lsub(self, directory='""', pattern='*'): - """List 'subscribed' mailbox names in directory matching pattern. - - (typ, [data, ...]) = .lsub(directory='""', pattern='*') - - 'data' are tuples of message part envelope and data. - """ - name = 'LSUB' - typ, dat = self._simple_command(name, directory, pattern) - return self._untagged_response(typ, dat, name) - - def myrights(self, mailbox): - """Show my ACLs for a mailbox (i.e. the rights that I have on mailbox). - - (typ, [data]) = .myrights(mailbox) - """ - typ,dat = self._simple_command('MYRIGHTS', mailbox) - return self._untagged_response(typ, dat, 'MYRIGHTS') - - def namespace(self): - """ Returns IMAP namespaces ala rfc2342 - - (typ, [data, ...]) = .namespace() - """ - name = 'NAMESPACE' - typ, dat = self._simple_command(name) - return self._untagged_response(typ, dat, name) - - - def noop(self): - """Send NOOP command. - - (typ, [data]) = .noop() - """ - if __debug__: - if self.debug >= 3: - self._dump_ur(self.untagged_responses) - return self._simple_command('NOOP') - - - def partial(self, message_num, message_part, start, length): - """Fetch truncated part of a message. - - (typ, [data, ...]) = .partial(message_num, message_part, start, length) - - 'data' is tuple of message part envelope and data. - """ - name = 'PARTIAL' - typ, dat = self._simple_command(name, message_num, message_part, start, length) - return self._untagged_response(typ, dat, 'FETCH') - - - def proxyauth(self, user): - """Assume authentication as "user". - - Allows an authorised administrator to proxy into any user's - mailbox. - - (typ, [data]) = .proxyauth(user) - """ - - name = 'PROXYAUTH' - return self._simple_command('PROXYAUTH', user) - - - def rename(self, oldmailbox, newmailbox): - """Rename old mailbox name to new. - - (typ, [data]) = .rename(oldmailbox, newmailbox) - """ - return self._simple_command('RENAME', oldmailbox, newmailbox) - - - def search(self, charset, *criteria): - """Search mailbox for matching messages. - - (typ, [data]) = .search(charset, criterion, ...) - - 'data' is space separated list of matching message numbers. - """ - name = 'SEARCH' - if charset: - typ, dat = self._simple_command(name, 'CHARSET', charset, *criteria) - else: - typ, dat = self._simple_command(name, *criteria) - return self._untagged_response(typ, dat, name) - - - def select(self, mailbox='INBOX', readonly=False): - """Select a mailbox. - - Flush all untagged responses. - - (typ, [data]) = .select(mailbox='INBOX', readonly=False) - - 'data' is count of messages in mailbox ('EXISTS' response). - - Mandated responses are ('FLAGS', 'EXISTS', 'RECENT', 'UIDVALIDITY'), so - other responses should be obtained via .response('FLAGS') etc. - """ - self.untagged_responses = {} # Flush old responses. - self.is_readonly = readonly - if readonly: - name = 'EXAMINE' - else: - name = 'SELECT' - typ, dat = self._simple_command(name, mailbox) - if typ != 'OK': - self.state = 'AUTH' # Might have been 'SELECTED' - return typ, dat - self.state = 'SELECTED' - if 'READ-ONLY' in self.untagged_responses \ - and not readonly: - if __debug__: - if self.debug >= 1: - self._dump_ur(self.untagged_responses) - raise self.readonly('%s is not writable' % mailbox) - return typ, self.untagged_responses.get('EXISTS', [None]) - - - def setacl(self, mailbox, who, what): - """Set a mailbox acl. - - (typ, [data]) = .setacl(mailbox, who, what) - """ - return self._simple_command('SETACL', mailbox, who, what) - - - def setannotation(self, *args): - """(typ, [data]) = .setannotation(mailbox[, entry, attribute]+) - Set ANNOTATIONs.""" - - typ, dat = self._simple_command('SETANNOTATION', *args) - return self._untagged_response(typ, dat, 'ANNOTATION') - - - def setquota(self, root, limits): - """Set the quota root's resource limits. - - (typ, [data]) = .setquota(root, limits) - """ - typ, dat = self._simple_command('SETQUOTA', root, limits) - return self._untagged_response(typ, dat, 'QUOTA') - - - def sort(self, sort_criteria, charset, *search_criteria): - """IMAP4rev1 extension SORT command. - - (typ, [data]) = .sort(sort_criteria, charset, search_criteria, ...) - """ - name = 'SORT' - #if not name in self.capabilities: # Let the server decide! - # raise self.error('unimplemented extension command: %s' % name) - if (sort_criteria[0],sort_criteria[-1]) != ('(',')'): - sort_criteria = '(%s)' % sort_criteria - typ, dat = self._simple_command(name, sort_criteria, charset, *search_criteria) - return self._untagged_response(typ, dat, name) - - - def status(self, mailbox, names): - """Request named status conditions for mailbox. - - (typ, [data]) = .status(mailbox, names) - """ - name = 'STATUS' - #if self.PROTOCOL_VERSION == 'IMAP4': # Let the server decide! - # raise self.error('%s unimplemented in IMAP4 (obtain IMAP4rev1 server, or re-code)' % name) - typ, dat = self._simple_command(name, mailbox, names) - return self._untagged_response(typ, dat, name) - - - def store(self, message_set, command, flags): - """Alters flag dispositions for messages in mailbox. - - (typ, [data]) = .store(message_set, command, flags) - """ - if (flags[0],flags[-1]) != ('(',')'): - flags = '(%s)' % flags # Avoid quoting the flags - typ, dat = self._simple_command('STORE', message_set, command, flags) - return self._untagged_response(typ, dat, 'FETCH') - - - def subscribe(self, mailbox): - """Subscribe to new mailbox. - - (typ, [data]) = .subscribe(mailbox) - """ - return self._simple_command('SUBSCRIBE', mailbox) - - - def thread(self, threading_algorithm, charset, *search_criteria): - """IMAPrev1 extension THREAD command. - - (type, [data]) = .thread(threading_algorithm, charset, search_criteria, ...) - """ - name = 'THREAD' - typ, dat = self._simple_command(name, threading_algorithm, charset, *search_criteria) - return self._untagged_response(typ, dat, name) - - - def uid(self, command, *args): - """Execute "command arg ..." with messages identified by UID, - rather than message number. - - (typ, [data]) = .uid(command, arg1, arg2, ...) - - Returns response appropriate to 'command'. - """ - command = command.upper() - if not command in Commands: - raise self.error("Unknown IMAP4 UID command: %s" % command) - if self.state not in Commands[command]: - raise self.error("command %s illegal in state %s, " - "only allowed in states %s" % - (command, self.state, - ', '.join(Commands[command]))) - name = 'UID' - typ, dat = self._simple_command(name, command, *args) - if command in ('SEARCH', 'SORT', 'THREAD'): - name = command - else: - name = 'FETCH' - return self._untagged_response(typ, dat, name) - - - def unsubscribe(self, mailbox): - """Unsubscribe from old mailbox. - - (typ, [data]) = .unsubscribe(mailbox) - """ - return self._simple_command('UNSUBSCRIBE', mailbox) - - - def xatom(self, name, *args): - """Allow simple extension commands - notified by server in CAPABILITY response. - - Assumes command is legal in current state. - - (typ, [data]) = .xatom(name, arg, ...) - - Returns response appropriate to extension command `name'. - """ - name = name.upper() - #if not name in self.capabilities: # Let the server decide! - # raise self.error('unknown extension command: %s' % name) - if not name in Commands: - Commands[name] = (self.state,) - return self._simple_command(name, *args) - - - - # Private methods - - - def _append_untagged(self, typ, dat): - - if dat is None: dat = '' - ur = self.untagged_responses - if __debug__: - if self.debug >= 5: - self._mesg('untagged_responses[%s] %s += ["%s"]' % - (typ, len(ur.get(typ,'')), dat)) - if typ in ur: - ur[typ].append(dat) - else: - ur[typ] = [dat] - - - def _check_bye(self): - bye = self.untagged_responses.get('BYE') - if bye: - raise self.abort(bye[-1]) - - - def _command(self, name, *args): - - if self.state not in Commands[name]: - self.literal = None - raise self.error("command %s illegal in state %s, " - "only allowed in states %s" % - (name, self.state, - ', '.join(Commands[name]))) - - for typ in ('OK', 'NO', 'BAD'): - if typ in self.untagged_responses: - del self.untagged_responses[typ] - - if 'READ-ONLY' in self.untagged_responses \ - and not self.is_readonly: - raise self.readonly('mailbox status changed to READ-ONLY') - - tag = self._new_tag() - data = '%s %s' % (tag, name) - for arg in args: - if arg is None: continue - data = '%s %s' % (data, self._checkquote(arg)) - - literal = self.literal - if literal is not None: - self.literal = None - if type(literal) is type(self._command): - literator = literal - else: - literator = None - data = '%s {%s}' % (data, len(literal)) - - if __debug__: - if self.debug >= 4: - self._mesg('> %s' % data) - else: - self._log('> %s' % data) - - try: - self.send('%s%s' % (data, CRLF)) - except (socket.error, OSError), val: - raise self.abort('socket error: %s' % val) - - if literal is None: - return tag - - while 1: - # Wait for continuation response - - while self._get_response(): - if self.tagged_commands[tag]: # BAD/NO? - return tag - - # Send literal - - if literator: - literal = literator(self.continuation_response) - - if __debug__: - if self.debug >= 4: - self._mesg('write literal size %s' % len(literal)) - - try: - self.send(literal) - self.send(CRLF) - except (socket.error, OSError), val: - raise self.abort('socket error: %s' % val) - - if not literator: - break - - return tag - - - def _command_complete(self, name, tag): - # BYE is expected after LOGOUT - if name != 'LOGOUT': - self._check_bye() - try: - typ, data = self._get_tagged_response(tag) - except self.abort, val: - raise self.abort('command: %s => %s' % (name, val)) - except self.error, val: - raise self.error('command: %s => %s' % (name, val)) - if name != 'LOGOUT': - self._check_bye() - if typ == 'BAD': - raise self.error('%s command error: %s %s' % (name, typ, data)) - return typ, data - - - def _get_response(self): - - # Read response and store. - # - # Returns None for continuation responses, - # otherwise first response line received. - - resp = self._get_line() - - # Command completion response? - - if self._match(self.tagre, resp): - tag = self.mo.group('tag') - if not tag in self.tagged_commands: - raise self.abort('unexpected tagged response: %s' % resp) - - typ = self.mo.group('type') - dat = self.mo.group('data') - self.tagged_commands[tag] = (typ, [dat]) - else: - dat2 = None - - # '*' (untagged) responses? - - if not self._match(Untagged_response, resp): - if self._match(Untagged_status, resp): - dat2 = self.mo.group('data2') - - if self.mo is None: - # Only other possibility is '+' (continuation) response... - - if self._match(Continuation, resp): - self.continuation_response = self.mo.group('data') - return None # NB: indicates continuation - - raise self.abort("unexpected response: '%s'" % resp) - - typ = self.mo.group('type') - dat = self.mo.group('data') - if dat is None: dat = '' # Null untagged response - if dat2: dat = dat + ' ' + dat2 - - # Is there a literal to come? - - while self._match(Literal, dat): - - # Read literal direct from connection. - - size = int(self.mo.group('size')) - if __debug__: - if self.debug >= 4: - self._mesg('read literal size %s' % size) - data = self.read(size) - - # Store response with literal as tuple - - self._append_untagged(typ, (dat, data)) - - # Read trailer - possibly containing another literal - - dat = self._get_line() - - self._append_untagged(typ, dat) - - # Bracketed response information? - - if typ in ('OK', 'NO', 'BAD') and self._match(Response_code, dat): - self._append_untagged(self.mo.group('type'), self.mo.group('data')) - - if __debug__: - if self.debug >= 1 and typ in ('NO', 'BAD', 'BYE'): - self._mesg('%s response: %s' % (typ, dat)) - - return resp - - - def _get_tagged_response(self, tag): - - while 1: - result = self.tagged_commands[tag] - if result is not None: - del self.tagged_commands[tag] - return result - - # If we've seen a BYE at this point, the socket will be - # closed, so report the BYE now. - - self._check_bye() - - # Some have reported "unexpected response" exceptions. - # Note that ignoring them here causes loops. - # Instead, send me details of the unexpected response and - # I'll update the code in `_get_response()'. - - try: - self._get_response() - except self.abort, val: - if __debug__: - if self.debug >= 1: - self.print_log() - raise - - - def _get_line(self): - - line = self.readline() - if not line: - raise self.abort('socket error: EOF') - - # Protocol mandates all lines terminated by CRLF - if not line.endswith('\r\n'): - raise self.abort('socket error: unterminated line') - - line = line[:-2] - if __debug__: - if self.debug >= 4: - self._mesg('< %s' % line) - else: - self._log('< %s' % line) - return line - - - def _match(self, cre, s): - - # Run compiled regular expression match method on 's'. - # Save result, return success. - - self.mo = cre.match(s) - if __debug__: - if self.mo is not None and self.debug >= 5: - self._mesg("\tmatched r'%s' => %r" % (cre.pattern, self.mo.groups())) - return self.mo is not None - - - def _new_tag(self): - - tag = '%s%s' % (self.tagpre, self.tagnum) - self.tagnum = self.tagnum + 1 - self.tagged_commands[tag] = None - return tag - - - def _checkquote(self, arg): - - # Must quote command args if non-alphanumeric chars present, - # and not already quoted. - - if type(arg) is not type(''): - return arg - if len(arg) >= 2 and (arg[0],arg[-1]) in (('(',')'),('"','"')): - return arg - if arg and self.mustquote.search(arg) is None: - return arg - return self._quote(arg) - - - def _quote(self, arg): - - arg = arg.replace('\\', '\\\\') - arg = arg.replace('"', '\\"') - - return '"%s"' % arg - - - def _simple_command(self, name, *args): - - return self._command_complete(name, self._command(name, *args)) - - - def _untagged_response(self, typ, dat, name): - - if typ == 'NO': - return typ, dat - if not name in self.untagged_responses: - return typ, [None] - data = self.untagged_responses.pop(name) - if __debug__: - if self.debug >= 5: - self._mesg('untagged_responses[%s] => %s' % (name, data)) - return typ, data - - - if __debug__: - - def _mesg(self, s, secs=None): - if secs is None: - secs = time.time() - tm = time.strftime('%M:%S', time.localtime(secs)) - sys.stderr.write(' %s.%02d %s\n' % (tm, (secs*100)%100, s)) - sys.stderr.flush() - - def _dump_ur(self, dict): - # Dump untagged responses (in `dict'). - l = dict.items() - if not l: return - t = '\n\t\t' - l = map(lambda x:'%s: "%s"' % (x[0], x[1][0] and '" "'.join(x[1]) or ''), l) - self._mesg('untagged responses dump:%s%s' % (t, t.join(l))) - - def _log(self, line): - # Keep log of last `_cmd_log_len' interactions for debugging. - self._cmd_log[self._cmd_log_idx] = (line, time.time()) - self._cmd_log_idx += 1 - if self._cmd_log_idx >= self._cmd_log_len: - self._cmd_log_idx = 0 - - def print_log(self): - self._mesg('last %d IMAP4 interactions:' % len(self._cmd_log)) - i, n = self._cmd_log_idx, self._cmd_log_len - while n: - try: - self._mesg(*self._cmd_log[i]) - except: - pass - i += 1 - if i >= self._cmd_log_len: - i = 0 - n -= 1 - - - -try: - import ssl -except ImportError: - pass -else: - class IMAP4_SSL(IMAP4): - - """IMAP4 client class over SSL connection - - Instantiate with: IMAP4_SSL([host[, port[, keyfile[, certfile]]]]) - - host - host's name (default: localhost); - port - port number (default: standard IMAP4 SSL port). - keyfile - PEM formatted file that contains your private key (default: None); - certfile - PEM formatted certificate chain file (default: None); - - for more documentation see the docstring of the parent class IMAP4. - """ - - - def __init__(self, host = '', port = IMAP4_SSL_PORT, keyfile = None, certfile = None): - self.keyfile = keyfile - self.certfile = certfile - IMAP4.__init__(self, host, port) - - - def open(self, host = '', port = IMAP4_SSL_PORT): - """Setup connection to remote server on "host:port". - (default: localhost:standard IMAP4 SSL port). - This connection will be used by the routines: - read, readline, send, shutdown. - """ - self.host = host - self.port = port - self.sock = socket.create_connection((host, port)) - self.sslobj = ssl.wrap_socket(self.sock, self.keyfile, self.certfile) - self.file = self.sslobj.makefile('rb') - - - def read(self, size): - """Read 'size' bytes from remote.""" - return self.file.read(size) - - - def readline(self): - """Read line from remote.""" - return self.file.readline() - - - def send(self, data): - """Send data to remote.""" - bytes = len(data) - while bytes > 0: - sent = self.sslobj.write(data) - if sent == bytes: - break # avoid copy - data = data[sent:] - bytes = bytes - sent - - - def shutdown(self): - """Close I/O established in "open".""" - self.file.close() - self.sock.close() - - - def socket(self): - """Return socket instance used to connect to IMAP4 server. - - socket = .socket() - """ - return self.sock - - - def ssl(self): - """Return SSLObject instance used to communicate with the IMAP4 server. - - ssl = ssl.wrap_socket(.socket) - """ - return self.sslobj - - __all__.append("IMAP4_SSL") - - -class IMAP4_stream(IMAP4): - - """IMAP4 client class over a stream - - Instantiate with: IMAP4_stream(command) - - where "command" is a string that can be passed to subprocess.Popen() - - for more documentation see the docstring of the parent class IMAP4. - """ - - - def __init__(self, command): - self.command = command - IMAP4.__init__(self) - - - def open(self, host = None, port = None): - """Setup a stream connection. - This connection will be used by the routines: - read, readline, send, shutdown. - """ - self.host = None # For compatibility with parent class - self.port = None - self.sock = None - self.file = None - self.process = subprocess.Popen(self.command, - stdin=subprocess.PIPE, stdout=subprocess.PIPE, - shell=True, close_fds=True) - self.writefile = self.process.stdin - self.readfile = self.process.stdout - - - def read(self, size): - """Read 'size' bytes from remote.""" - return self.readfile.read(size) - - - def readline(self): - """Read line from remote.""" - return self.readfile.readline() - - - def send(self, data): - """Send data to remote.""" - self.writefile.write(data) - self.writefile.flush() - - - def shutdown(self): - """Close I/O established in "open".""" - self.readfile.close() - self.writefile.close() - self.process.wait() - - - -class _Authenticator: - - """Private class to provide en/decoding - for base64-based authentication conversation. - """ - - def __init__(self, mechinst): - self.mech = mechinst # Callable object to provide/process data - - def process(self, data): - ret = self.mech(self.decode(data)) - if ret is None: - return '*' # Abort conversation - return self.encode(ret) - - def encode(self, inp): - # - # Invoke binascii.b2a_base64 iteratively with - # short even length buffers, strip the trailing - # line feed from the result and append. "Even" - # means a number that factors to both 6 and 8, - # so when it gets to the end of the 8-bit input - # there's no partial 6-bit output. - # - oup = '' - while inp: - if len(inp) > 48: - t = inp[:48] - inp = inp[48:] - else: - t = inp - inp = '' - e = binascii.b2a_base64(t) - if e: - oup = oup + e[:-1] - return oup - - def decode(self, inp): - if not inp: - return '' - return binascii.a2b_base64(inp) - - - -Mon2num = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, - 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12} - -def Internaldate2tuple(resp): - """Parse an IMAP4 INTERNALDATE string. - - Return corresponding local time. The return value is a - time.struct_time instance or None if the string has wrong format. - """ - - mo = InternalDate.match(resp) - if not mo: - return None - - mon = Mon2num[mo.group('mon')] - zonen = mo.group('zonen') - - day = int(mo.group('day')) - year = int(mo.group('year')) - hour = int(mo.group('hour')) - min = int(mo.group('min')) - sec = int(mo.group('sec')) - zoneh = int(mo.group('zoneh')) - zonem = int(mo.group('zonem')) - - # INTERNALDATE timezone must be subtracted to get UT - - zone = (zoneh*60 + zonem)*60 - if zonen == '-': - zone = -zone - - tt = (year, mon, day, hour, min, sec, -1, -1, -1) - - utc = time.mktime(tt) - - # Following is necessary because the time module has no 'mkgmtime'. - # 'mktime' assumes arg in local timezone, so adds timezone/altzone. - - lt = time.localtime(utc) - if time.daylight and lt[-1]: - zone = zone + time.altzone - else: - zone = zone + time.timezone - - return time.localtime(utc - zone) - - - -def Int2AP(num): - - """Convert integer to A-P string representation.""" - - val = ''; AP = 'ABCDEFGHIJKLMNOP' - num = int(abs(num)) - while num: - num, mod = divmod(num, 16) - val = AP[mod] + val - return val - - - -def ParseFlags(resp): - - """Convert IMAP4 flags response to python tuple.""" - - mo = Flags.match(resp) - if not mo: - return () - - return tuple(mo.group('flags').split()) - - -def Time2Internaldate(date_time): - - """Convert date_time to IMAP4 INTERNALDATE representation. - - Return string in form: '"DD-Mmm-YYYY HH:MM:SS +HHMM"'. The - date_time argument can be a number (int or float) representing - seconds since epoch (as returned by time.time()), a 9-tuple - representing local time (as returned by time.localtime()), or a - double-quoted string. In the last case, it is assumed to already - be in the correct format. - """ - - if isinstance(date_time, (int, float)): - tt = time.localtime(date_time) - elif isinstance(date_time, (tuple, time.struct_time)): - tt = date_time - elif isinstance(date_time, str) and (date_time[0],date_time[-1]) == ('"','"'): - return date_time # Assume in correct format - else: - raise ValueError("date_time not of a known type") - - dt = time.strftime("%d-%b-%Y %H:%M:%S", tt) - if dt[0] == '0': - dt = ' ' + dt[1:] - if time.daylight and tt[-1]: - zone = -time.altzone - else: - zone = -time.timezone - return '"' + dt + " %+03d%02d" % divmod(zone//60, 60) + '"' - - - -if __name__ == '__main__': - - # To test: invoke either as 'python imaplib.py [IMAP4_server_hostname]' - # or 'python imaplib.py -s "rsh IMAP4_server_hostname exec /etc/rimapd"' - # to test the IMAP4_stream class - - import getopt, getpass - - try: - optlist, args = getopt.getopt(sys.argv[1:], 'd:s:') - except getopt.error, val: - optlist, args = (), () - - stream_command = None - for opt,val in optlist: - if opt == '-d': - Debug = int(val) - elif opt == '-s': - stream_command = val - if not args: args = (stream_command,) - - if not args: args = ('',) - - host = args[0] - - USER = getpass.getuser() - PASSWD = getpass.getpass("IMAP password for %s on %s: " % (USER, host or "localhost")) - - test_mesg = 'From: %(user)s@localhost%(lf)sSubject: IMAP4 test%(lf)s%(lf)sdata...%(lf)s' % {'user':USER, 'lf':'\n'} - test_seq1 = ( - ('login', (USER, PASSWD)), - ('create', ('/tmp/xxx 1',)), - ('rename', ('/tmp/xxx 1', '/tmp/yyy')), - ('CREATE', ('/tmp/yyz 2',)), - ('append', ('/tmp/yyz 2', None, None, test_mesg)), - ('list', ('/tmp', 'yy*')), - ('select', ('/tmp/yyz 2',)), - ('search', (None, 'SUBJECT', 'test')), - ('fetch', ('1', '(FLAGS INTERNALDATE RFC822)')), - ('store', ('1', 'FLAGS', '(\Deleted)')), - ('namespace', ()), - ('expunge', ()), - ('recent', ()), - ('close', ()), - ) - - test_seq2 = ( - ('select', ()), - ('response',('UIDVALIDITY',)), - ('uid', ('SEARCH', 'ALL')), - ('response', ('EXISTS',)), - ('append', (None, None, None, test_mesg)), - ('recent', ()), - ('logout', ()), - ) - - def run(cmd, args): - M._mesg('%s %s' % (cmd, args)) - typ, dat = getattr(M, cmd)(*args) - M._mesg('%s => %s %s' % (cmd, typ, dat)) - if typ == 'NO': raise dat[0] - return dat - - try: - if stream_command: - M = IMAP4_stream(stream_command) - else: - M = IMAP4(host) - if M.state == 'AUTH': - test_seq1 = test_seq1[1:] # Login not needed - M._mesg('PROTOCOL_VERSION = %s' % M.PROTOCOL_VERSION) - M._mesg('CAPABILITIES = %r' % (M.capabilities,)) - - for cmd,args in test_seq1: - run(cmd, args) - - for ml in run('list', ('/tmp/', 'yy%')): - mo = re.match(r'.*"([^"]+)"$', ml) - if mo: path = mo.group(1) - else: path = ml.split()[-1] - run('delete', (path,)) - - for cmd,args in test_seq2: - dat = run(cmd, args) - - if (cmd,args) != ('uid', ('SEARCH', 'ALL')): - continue - - uid = dat[-1].split() - if not uid: continue - run('uid', ('FETCH', '%s' % uid[-1], - '(FLAGS INTERNALDATE RFC822.SIZE RFC822.HEADER RFC822.TEXT)')) - - print '\nAll tests OK.' - - except: - print '\nTests failed.' - - if not Debug: - print ''' -If you would like to see debugging output, -try: %s -d5 -''' % sys.argv[0] - - raise diff --git a/python/Lib/imghdr.py b/python/Lib/imghdr.py deleted file mode 100755 index fc864c3a2e..0000000000 --- a/python/Lib/imghdr.py +++ /dev/null @@ -1,159 +0,0 @@ -"""Recognize image file formats based on their first few bytes.""" - -__all__ = ["what"] - -#-------------------------# -# Recognize image headers # -#-------------------------# - -def what(file, h=None): - f = None - try: - if h is None: - if isinstance(file, basestring): - f = open(file, 'rb') - h = f.read(32) - else: - location = file.tell() - h = file.read(32) - file.seek(location) - for tf in tests: - res = tf(h, f) - if res: - return res - finally: - if f: f.close() - return None - - -#---------------------------------# -# Subroutines per image file type # -#---------------------------------# - -tests = [] - -def test_jpeg(h, f): - """JPEG data in JFIF format""" - if h[6:10] == 'JFIF': - return 'jpeg' - -tests.append(test_jpeg) - -def test_exif(h, f): - """JPEG data in Exif format""" - if h[6:10] == 'Exif': - return 'jpeg' - -tests.append(test_exif) - -def test_png(h, f): - if h[:8] == "\211PNG\r\n\032\n": - return 'png' - -tests.append(test_png) - -def test_gif(h, f): - """GIF ('87 and '89 variants)""" - if h[:6] in ('GIF87a', 'GIF89a'): - return 'gif' - -tests.append(test_gif) - -def test_tiff(h, f): - """TIFF (can be in Motorola or Intel byte order)""" - if h[:2] in ('MM', 'II'): - return 'tiff' - -tests.append(test_tiff) - -def test_rgb(h, f): - """SGI image library""" - if h[:2] == '\001\332': - return 'rgb' - -tests.append(test_rgb) - -def test_pbm(h, f): - """PBM (portable bitmap)""" - if len(h) >= 3 and \ - h[0] == 'P' and h[1] in '14' and h[2] in ' \t\n\r': - return 'pbm' - -tests.append(test_pbm) - -def test_pgm(h, f): - """PGM (portable graymap)""" - if len(h) >= 3 and \ - h[0] == 'P' and h[1] in '25' and h[2] in ' \t\n\r': - return 'pgm' - -tests.append(test_pgm) - -def test_ppm(h, f): - """PPM (portable pixmap)""" - if len(h) >= 3 and \ - h[0] == 'P' and h[1] in '36' and h[2] in ' \t\n\r': - return 'ppm' - -tests.append(test_ppm) - -def test_rast(h, f): - """Sun raster file""" - if h[:4] == '\x59\xA6\x6A\x95': - return 'rast' - -tests.append(test_rast) - -def test_xbm(h, f): - """X bitmap (X10 or X11)""" - s = '#define ' - if h[:len(s)] == s: - return 'xbm' - -tests.append(test_xbm) - -def test_bmp(h, f): - if h[:2] == 'BM': - return 'bmp' - -tests.append(test_bmp) - -#--------------------# -# Small test program # -#--------------------# - -def test(): - import sys - recursive = 0 - if sys.argv[1:] and sys.argv[1] == '-r': - del sys.argv[1:2] - recursive = 1 - try: - if sys.argv[1:]: - testall(sys.argv[1:], recursive, 1) - else: - testall(['.'], recursive, 1) - except KeyboardInterrupt: - sys.stderr.write('\n[Interrupted]\n') - sys.exit(1) - -def testall(list, recursive, toplevel): - import sys - import os - for filename in list: - if os.path.isdir(filename): - print filename + '/:', - if recursive or toplevel: - print 'recursing down:' - import glob - names = glob.glob(os.path.join(filename, '*')) - testall(names, recursive, 0) - else: - print '*** directory (use -r) ***' - else: - print filename + ':', - sys.stdout.flush() - try: - print what(filename) - except IOError: - print '*** not found ***' diff --git a/python/Lib/importlib/__init__.py b/python/Lib/importlib/__init__.py deleted file mode 100644 index ad31a1ac47..0000000000 --- a/python/Lib/importlib/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -"""Backport of importlib.import_module from 3.x.""" -# While not critical (and in no way guaranteed!), it would be nice to keep this -# code compatible with Python 2.3. -import sys - -def _resolve_name(name, package, level): - """Return the absolute name of the module to be imported.""" - if not hasattr(package, 'rindex'): - raise ValueError("'package' not set to a string") - dot = len(package) - for x in xrange(level, 1, -1): - try: - dot = package.rindex('.', 0, dot) - except ValueError: - raise ValueError("attempted relative import beyond top-level " - "package") - return "%s.%s" % (package[:dot], name) - - -def import_module(name, package=None): - """Import a module. - - The 'package' argument is required when performing a relative import. It - specifies the package to use as the anchor point from which to resolve the - relative import to an absolute import. - - """ - if name.startswith('.'): - if not package: - raise TypeError("relative imports require the 'package' argument") - level = 0 - for character in name: - if character != '.': - break - level += 1 - name = _resolve_name(name[level:], package, level) - __import__(name) - return sys.modules[name] diff --git a/python/Lib/imputil.py b/python/Lib/imputil.py deleted file mode 100755 index a5fa6ea4f7..0000000000 --- a/python/Lib/imputil.py +++ /dev/null @@ -1,725 +0,0 @@ -""" -Import utilities - -Exported classes: - ImportManager Manage the import process - - Importer Base class for replacing standard import functions - BuiltinImporter Emulate the import mechanism for builtin and frozen modules - - DynLoadSuffixImporter -""" -from warnings import warnpy3k -warnpy3k("the imputil module has been removed in Python 3.0", stacklevel=2) -del warnpy3k - -# note: avoid importing non-builtin modules -import imp ### not available in Jython? -import sys -import __builtin__ - -# for the DirectoryImporter -import struct -import marshal - -__all__ = ["ImportManager","Importer","BuiltinImporter"] - -_StringType = type('') -_ModuleType = type(sys) ### doesn't work in Jython... - -class ImportManager: - "Manage the import process." - - def install(self, namespace=vars(__builtin__)): - "Install this ImportManager into the specified namespace." - - if isinstance(namespace, _ModuleType): - namespace = vars(namespace) - - # Note: we have no notion of "chaining" - - # Record the previous import hook, then install our own. - self.previous_importer = namespace['__import__'] - self.namespace = namespace - namespace['__import__'] = self._import_hook - - ### fix this - #namespace['reload'] = self._reload_hook - - def uninstall(self): - "Restore the previous import mechanism." - self.namespace['__import__'] = self.previous_importer - - def add_suffix(self, suffix, importFunc): - assert hasattr(importFunc, '__call__') - self.fs_imp.add_suffix(suffix, importFunc) - - ###################################################################### - # - # PRIVATE METHODS - # - - clsFilesystemImporter = None - - def __init__(self, fs_imp=None): - # we're definitely going to be importing something in the future, - # so let's just load the OS-related facilities. - if not _os_stat: - _os_bootstrap() - - # This is the Importer that we use for grabbing stuff from the - # filesystem. It defines one more method (import_from_dir) for our use. - if fs_imp is None: - cls = self.clsFilesystemImporter or _FilesystemImporter - fs_imp = cls() - self.fs_imp = fs_imp - - # Initialize the set of suffixes that we recognize and import. - # The default will import dynamic-load modules first, followed by - # .py files (or a .py file's cached bytecode) - for desc in imp.get_suffixes(): - if desc[2] == imp.C_EXTENSION: - self.add_suffix(desc[0], - DynLoadSuffixImporter(desc).import_file) - self.add_suffix('.py', py_suffix_importer) - - def _import_hook(self, fqname, globals=None, locals=None, fromlist=None): - """Python calls this hook to locate and import a module.""" - - parts = fqname.split('.') - - # determine the context of this import - parent = self._determine_import_context(globals) - - # if there is a parent, then its importer should manage this import - if parent: - module = parent.__importer__._do_import(parent, parts, fromlist) - if module: - return module - - # has the top module already been imported? - try: - top_module = sys.modules[parts[0]] - except KeyError: - - # look for the topmost module - top_module = self._import_top_module(parts[0]) - if not top_module: - # the topmost module wasn't found at all. - raise ImportError, 'No module named ' + fqname - - # fast-path simple imports - if len(parts) == 1: - if not fromlist: - return top_module - - if not top_module.__dict__.get('__ispkg__'): - # __ispkg__ isn't defined (the module was not imported by us), - # or it is zero. - # - # In the former case, there is no way that we could import - # sub-modules that occur in the fromlist (but we can't raise an - # error because it may just be names) because we don't know how - # to deal with packages that were imported by other systems. - # - # In the latter case (__ispkg__ == 0), there can't be any sub- - # modules present, so we can just return. - # - # In both cases, since len(parts) == 1, the top_module is also - # the "bottom" which is the defined return when a fromlist - # exists. - return top_module - - importer = top_module.__dict__.get('__importer__') - if importer: - return importer._finish_import(top_module, parts[1:], fromlist) - - # Grrr, some people "import os.path" or do "from os.path import ..." - if len(parts) == 2 and hasattr(top_module, parts[1]): - if fromlist: - return getattr(top_module, parts[1]) - else: - return top_module - - # If the importer does not exist, then we have to bail. A missing - # importer means that something else imported the module, and we have - # no knowledge of how to get sub-modules out of the thing. - raise ImportError, 'No module named ' + fqname - - def _determine_import_context(self, globals): - """Returns the context in which a module should be imported. - - The context could be a loaded (package) module and the imported module - will be looked for within that package. The context could also be None, - meaning there is no context -- the module should be looked for as a - "top-level" module. - """ - - if not globals or not globals.get('__importer__'): - # globals does not refer to one of our modules or packages. That - # implies there is no relative import context (as far as we are - # concerned), and it should just pick it off the standard path. - return None - - # The globals refer to a module or package of ours. It will define - # the context of the new import. Get the module/package fqname. - parent_fqname = globals['__name__'] - - # if a package is performing the import, then return itself (imports - # refer to pkg contents) - if globals['__ispkg__']: - parent = sys.modules[parent_fqname] - assert globals is parent.__dict__ - return parent - - i = parent_fqname.rfind('.') - - # a module outside of a package has no particular import context - if i == -1: - return None - - # if a module in a package is performing the import, then return the - # package (imports refer to siblings) - parent_fqname = parent_fqname[:i] - parent = sys.modules[parent_fqname] - assert parent.__name__ == parent_fqname - return parent - - def _import_top_module(self, name): - # scan sys.path looking for a location in the filesystem that contains - # the module, or an Importer object that can import the module. - for item in sys.path: - if isinstance(item, _StringType): - module = self.fs_imp.import_from_dir(item, name) - else: - module = item.import_top(name) - if module: - return module - return None - - def _reload_hook(self, module): - "Python calls this hook to reload a module." - - # reloading of a module may or may not be possible (depending on the - # importer), but at least we can validate that it's ours to reload - importer = module.__dict__.get('__importer__') - if not importer: - ### oops. now what... - pass - - # okay. it is using the imputil system, and we must delegate it, but - # we don't know what to do (yet) - ### we should blast the module dict and do another get_code(). need to - ### flesh this out and add proper docco... - raise SystemError, "reload not yet implemented" - - -class Importer: - "Base class for replacing standard import functions." - - def import_top(self, name): - "Import a top-level module." - return self._import_one(None, name, name) - - ###################################################################### - # - # PRIVATE METHODS - # - def _finish_import(self, top, parts, fromlist): - # if "a.b.c" was provided, then load the ".b.c" portion down from - # below the top-level module. - bottom = self._load_tail(top, parts) - - # if the form is "import a.b.c", then return "a" - if not fromlist: - # no fromlist: return the top of the import tree - return top - - # the top module was imported by self. - # - # this means that the bottom module was also imported by self (just - # now, or in the past and we fetched it from sys.modules). - # - # since we imported/handled the bottom module, this means that we can - # also handle its fromlist (and reliably use __ispkg__). - - # if the bottom node is a package, then (potentially) import some - # modules. - # - # note: if it is not a package, then "fromlist" refers to names in - # the bottom module rather than modules. - # note: for a mix of names and modules in the fromlist, we will - # import all modules and insert those into the namespace of - # the package module. Python will pick up all fromlist names - # from the bottom (package) module; some will be modules that - # we imported and stored in the namespace, others are expected - # to be present already. - if bottom.__ispkg__: - self._import_fromlist(bottom, fromlist) - - # if the form is "from a.b import c, d" then return "b" - return bottom - - def _import_one(self, parent, modname, fqname): - "Import a single module." - - # has the module already been imported? - try: - return sys.modules[fqname] - except KeyError: - pass - - # load the module's code, or fetch the module itself - result = self.get_code(parent, modname, fqname) - if result is None: - return None - - module = self._process_result(result, fqname) - - # insert the module into its parent - if parent: - setattr(parent, modname, module) - return module - - def _process_result(self, result, fqname): - ispkg, code, values = result - # did get_code() return an actual module? (rather than a code object) - is_module = isinstance(code, _ModuleType) - - # use the returned module, or create a new one to exec code into - if is_module: - module = code - else: - module = imp.new_module(fqname) - - ### record packages a bit differently?? - module.__importer__ = self - module.__ispkg__ = ispkg - - # insert additional values into the module (before executing the code) - module.__dict__.update(values) - - # the module is almost ready... make it visible - sys.modules[fqname] = module - - # execute the code within the module's namespace - if not is_module: - try: - exec code in module.__dict__ - except: - if fqname in sys.modules: - del sys.modules[fqname] - raise - - # fetch from sys.modules instead of returning module directly. - # also make module's __name__ agree with fqname, in case - # the "exec code in module.__dict__" played games on us. - module = sys.modules[fqname] - module.__name__ = fqname - return module - - def _load_tail(self, m, parts): - """Import the rest of the modules, down from the top-level module. - - Returns the last module in the dotted list of modules. - """ - for part in parts: - fqname = "%s.%s" % (m.__name__, part) - m = self._import_one(m, part, fqname) - if not m: - raise ImportError, "No module named " + fqname - return m - - def _import_fromlist(self, package, fromlist): - 'Import any sub-modules in the "from" list.' - - # if '*' is present in the fromlist, then look for the '__all__' - # variable to find additional items (modules) to import. - if '*' in fromlist: - fromlist = list(fromlist) + \ - list(package.__dict__.get('__all__', [])) - - for sub in fromlist: - # if the name is already present, then don't try to import it (it - # might not be a module!). - if sub != '*' and not hasattr(package, sub): - subname = "%s.%s" % (package.__name__, sub) - submod = self._import_one(package, sub, subname) - if not submod: - raise ImportError, "cannot import name " + subname - - def _do_import(self, parent, parts, fromlist): - """Attempt to import the module relative to parent. - - This method is used when the import context specifies that - imported the parent module. - """ - top_name = parts[0] - top_fqname = parent.__name__ + '.' + top_name - top_module = self._import_one(parent, top_name, top_fqname) - if not top_module: - # this importer and parent could not find the module (relatively) - return None - - return self._finish_import(top_module, parts[1:], fromlist) - - ###################################################################### - # - # METHODS TO OVERRIDE - # - def get_code(self, parent, modname, fqname): - """Find and retrieve the code for the given module. - - parent specifies a parent module to define a context for importing. It - may be None, indicating no particular context for the search. - - modname specifies a single module (not dotted) within the parent. - - fqname specifies the fully-qualified module name. This is a - (potentially) dotted name from the "root" of the module namespace - down to the modname. - If there is no parent, then modname==fqname. - - This method should return None, or a 3-tuple. - - * If the module was not found, then None should be returned. - - * The first item of the 2- or 3-tuple should be the integer 0 or 1, - specifying whether the module that was found is a package or not. - - * The second item is the code object for the module (it will be - executed within the new module's namespace). This item can also - be a fully-loaded module object (e.g. loaded from a shared lib). - - * The third item is a dictionary of name/value pairs that will be - inserted into new module before the code object is executed. This - is provided in case the module's code expects certain values (such - as where the module was found). When the second item is a module - object, then these names/values will be inserted *after* the module - has been loaded/initialized. - """ - raise RuntimeError, "get_code not implemented" - - -###################################################################### -# -# Some handy stuff for the Importers -# - -# byte-compiled file suffix character -_suffix_char = __debug__ and 'c' or 'o' - -# byte-compiled file suffix -_suffix = '.py' + _suffix_char - -def _compile(pathname, timestamp): - """Compile (and cache) a Python source file. - - The file specified by is compiled to a code object and - returned. - - Presuming the appropriate privileges exist, the bytecodes will be - saved back to the filesystem for future imports. The source file's - modification timestamp must be provided as a Long value. - """ - codestring = open(pathname, 'rU').read() - if codestring and codestring[-1] != '\n': - codestring = codestring + '\n' - code = __builtin__.compile(codestring, pathname, 'exec') - - # try to cache the compiled code - try: - f = open(pathname + _suffix_char, 'wb') - except IOError: - pass - else: - f.write('\0\0\0\0') - f.write(struct.pack('= t_py: - f = open(file, 'rb') - if f.read(4) == imp.get_magic(): - t = struct.unpack('>> import foo -# >>> foo -# -# -# ---- revamped import mechanism -# >>> import imputil -# >>> imputil._test_revamp() -# >>> import foo -# >>> foo -# -# -# -# from MAL: -# should BuiltinImporter exist in sys.path or hard-wired in ImportManager? -# need __path__ processing -# performance -# move chaining to a subclass [gjs: it's been nuked] -# deinstall should be possible -# query mechanism needed: is a specific Importer installed? -# py/pyc/pyo piping hooks to filter/process these files -# wish list: -# distutils importer hooked to list of standard Internet repositories -# module->file location mapper to speed FS-based imports -# relative imports -# keep chaining so that it can play nice with other import hooks -# -# from Gordon: -# push MAL's mapper into sys.path[0] as a cache (hard-coded for apps) -# -# from Guido: -# need to change sys.* references for rexec environs -# need hook for MAL's walk-me-up import strategy, or Tim's absolute strategy -# watch out for sys.modules[...] is None -# flag to force absolute imports? (speeds _determine_import_context and -# checking for a relative module) -# insert names of archives into sys.path (see quote below) -# note: reload does NOT blast module dict -# shift import mechanisms and policies around; provide for hooks, overrides -# (see quote below) -# add get_source stuff -# get_topcode and get_subcode -# CRLF handling in _compile -# race condition in _compile -# refactoring of os.py to deal with _os_bootstrap problem -# any special handling to do for importing a module with a SyntaxError? -# (e.g. clean up the traceback) -# implement "domain" for path-type functionality using pkg namespace -# (rather than FS-names like __path__) -# don't use the word "private"... maybe "internal" -# -# -# Guido's comments on sys.path caching: -# -# We could cache this in a dictionary: the ImportManager can have a -# cache dict mapping pathnames to importer objects, and a separate -# method for coming up with an importer given a pathname that's not yet -# in the cache. The method should do a stat and/or look at the -# extension to decide which importer class to use; you can register new -# importer classes by registering a suffix or a Boolean function, plus a -# class. If you register a new importer class, the cache is zapped. -# The cache is independent from sys.path (but maintained per -# ImportManager instance) so that rearrangements of sys.path do the -# right thing. If a path is dropped from sys.path the corresponding -# cache entry is simply no longer used. -# -# My/Guido's comments on factoring ImportManager and Importer: -# -# > However, we still have a tension occurring here: -# > -# > 1) implementing policy in ImportManager assists in single-point policy -# > changes for app/rexec situations -# > 2) implementing policy in Importer assists in package-private policy -# > changes for normal, operating conditions -# > -# > I'll see if I can sort out a way to do this. Maybe the Importer class will -# > implement the methods (which can be overridden to change policy) by -# > delegating to ImportManager. -# -# Maybe also think about what kind of policies an Importer would be -# likely to want to change. I have a feeling that a lot of the code -# there is actually not so much policy but a *necessity* to get things -# working given the calling conventions for the __import__ hook: whether -# to return the head or tail of a dotted name, or when to do the "finish -# fromlist" stuff. -# diff --git a/python/Lib/inspect.py b/python/Lib/inspect.py deleted file mode 100755 index 392c587e88..0000000000 --- a/python/Lib/inspect.py +++ /dev/null @@ -1,1063 +0,0 @@ -# -*- coding: iso-8859-1 -*- -"""Get useful information from live Python objects. - -This module encapsulates the interface provided by the internal special -attributes (func_*, co_*, im_*, tb_*, etc.) in a friendlier fashion. -It also provides some help for examining source code and class layout. - -Here are some of the useful functions provided by this module: - - ismodule(), isclass(), ismethod(), isfunction(), isgeneratorfunction(), - isgenerator(), istraceback(), isframe(), iscode(), isbuiltin(), - isroutine() - check object types - getmembers() - get members of an object that satisfy a given condition - - getfile(), getsourcefile(), getsource() - find an object's source code - getdoc(), getcomments() - get documentation on an object - getmodule() - determine the module that an object came from - getclasstree() - arrange classes so as to represent their hierarchy - - getargspec(), getargvalues(), getcallargs() - get info about function arguments - formatargspec(), formatargvalues() - format an argument spec - getouterframes(), getinnerframes() - get info about frames - currentframe() - get the current stack frame - stack(), trace() - get info about frames on the stack or in a traceback -""" - -# This module is in the public domain. No warranties. - -__author__ = 'Ka-Ping Yee ' -__date__ = '1 Jan 2001' - -import sys -import os -import types -import string -import re -import dis -import imp -import tokenize -import linecache -from operator import attrgetter -from collections import namedtuple - -# These constants are from Include/code.h. -CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8 -CO_NESTED, CO_GENERATOR, CO_NOFREE = 0x10, 0x20, 0x40 -# See Include/object.h -TPFLAGS_IS_ABSTRACT = 1 << 20 - -# ----------------------------------------------------------- type-checking -def ismodule(object): - """Return true if the object is a module. - - Module objects provide these attributes: - __doc__ documentation string - __file__ filename (missing for built-in modules)""" - return isinstance(object, types.ModuleType) - -def isclass(object): - """Return true if the object is a class. - - Class objects provide these attributes: - __doc__ documentation string - __module__ name of module in which this class was defined""" - return isinstance(object, (type, types.ClassType)) - -def ismethod(object): - """Return true if the object is an instance method. - - Instance method objects provide these attributes: - __doc__ documentation string - __name__ name with which this method was defined - im_class class object in which this method belongs - im_func function object containing implementation of method - im_self instance to which this method is bound, or None""" - return isinstance(object, types.MethodType) - -def ismethoddescriptor(object): - """Return true if the object is a method descriptor. - - But not if ismethod() or isclass() or isfunction() are true. - - This is new in Python 2.2, and, for example, is true of int.__add__. - An object passing this test has a __get__ attribute but not a __set__ - attribute, but beyond that the set of attributes varies. __name__ is - usually sensible, and __doc__ often is. - - Methods implemented via descriptors that also pass one of the other - tests return false from the ismethoddescriptor() test, simply because - the other tests promise more -- you can, e.g., count on having the - im_func attribute (etc) when an object passes ismethod().""" - return (hasattr(object, "__get__") - and not hasattr(object, "__set__") # else it's a data descriptor - and not ismethod(object) # mutual exclusion - and not isfunction(object) - and not isclass(object)) - -def isdatadescriptor(object): - """Return true if the object is a data descriptor. - - Data descriptors have both a __get__ and a __set__ attribute. Examples are - properties (defined in Python) and getsets and members (defined in C). - Typically, data descriptors will also have __name__ and __doc__ attributes - (properties, getsets, and members have both of these attributes), but this - is not guaranteed.""" - return (hasattr(object, "__set__") and hasattr(object, "__get__")) - -if hasattr(types, 'MemberDescriptorType'): - # CPython and equivalent - def ismemberdescriptor(object): - """Return true if the object is a member descriptor. - - Member descriptors are specialized descriptors defined in extension - modules.""" - return isinstance(object, types.MemberDescriptorType) -else: - # Other implementations - def ismemberdescriptor(object): - """Return true if the object is a member descriptor. - - Member descriptors are specialized descriptors defined in extension - modules.""" - return False - -if hasattr(types, 'GetSetDescriptorType'): - # CPython and equivalent - def isgetsetdescriptor(object): - """Return true if the object is a getset descriptor. - - getset descriptors are specialized descriptors defined in extension - modules.""" - return isinstance(object, types.GetSetDescriptorType) -else: - # Other implementations - def isgetsetdescriptor(object): - """Return true if the object is a getset descriptor. - - getset descriptors are specialized descriptors defined in extension - modules.""" - return False - -def isfunction(object): - """Return true if the object is a user-defined function. - - Function objects provide these attributes: - __doc__ documentation string - __name__ name with which this function was defined - func_code code object containing compiled function bytecode - func_defaults tuple of any default values for arguments - func_doc (same as __doc__) - func_globals global namespace in which this function was defined - func_name (same as __name__)""" - return isinstance(object, types.FunctionType) - -def isgeneratorfunction(object): - """Return true if the object is a user-defined generator function. - - Generator function objects provide the same attributes as functions. - See help(isfunction) for a list of attributes.""" - return bool((isfunction(object) or ismethod(object)) and - object.func_code.co_flags & CO_GENERATOR) - -def isgenerator(object): - """Return true if the object is a generator. - - Generator objects provide these attributes: - __iter__ defined to support iteration over container - close raises a new GeneratorExit exception inside the - generator to terminate the iteration - gi_code code object - gi_frame frame object or possibly None once the generator has - been exhausted - gi_running set to 1 when generator is executing, 0 otherwise - next return the next item from the container - send resumes the generator and "sends" a value that becomes - the result of the current yield-expression - throw used to raise an exception inside the generator""" - return isinstance(object, types.GeneratorType) - -def istraceback(object): - """Return true if the object is a traceback. - - Traceback objects provide these attributes: - tb_frame frame object at this level - tb_lasti index of last attempted instruction in bytecode - tb_lineno current line number in Python source code - tb_next next inner traceback object (called by this level)""" - return isinstance(object, types.TracebackType) - -def isframe(object): - """Return true if the object is a frame object. - - Frame objects provide these attributes: - f_back next outer frame object (this frame's caller) - f_builtins built-in namespace seen by this frame - f_code code object being executed in this frame - f_exc_traceback traceback if raised in this frame, or None - f_exc_type exception type if raised in this frame, or None - f_exc_value exception value if raised in this frame, or None - f_globals global namespace seen by this frame - f_lasti index of last attempted instruction in bytecode - f_lineno current line number in Python source code - f_locals local namespace seen by this frame - f_restricted 0 or 1 if frame is in restricted execution mode - f_trace tracing function for this frame, or None""" - return isinstance(object, types.FrameType) - -def iscode(object): - """Return true if the object is a code object. - - Code objects provide these attributes: - co_argcount number of arguments (not including * or ** args) - co_code string of raw compiled bytecode - co_consts tuple of constants used in the bytecode - co_filename name of file in which this code object was created - co_firstlineno number of first line in Python source code - co_flags bitmap: 1=optimized | 2=newlocals | 4=*arg | 8=**arg - co_lnotab encoded mapping of line numbers to bytecode indices - co_name name with which this code object was defined - co_names tuple of names of local variables - co_nlocals number of local variables - co_stacksize virtual machine stack space required - co_varnames tuple of names of arguments and local variables""" - return isinstance(object, types.CodeType) - -def isbuiltin(object): - """Return true if the object is a built-in function or method. - - Built-in functions and methods provide these attributes: - __doc__ documentation string - __name__ original name of this function or method - __self__ instance to which a method is bound, or None""" - return isinstance(object, types.BuiltinFunctionType) - -def isroutine(object): - """Return true if the object is any kind of function or method.""" - return (isbuiltin(object) - or isfunction(object) - or ismethod(object) - or ismethoddescriptor(object)) - -def isabstract(object): - """Return true if the object is an abstract base class (ABC).""" - return bool(isinstance(object, type) and object.__flags__ & TPFLAGS_IS_ABSTRACT) - -def getmembers(object, predicate=None): - """Return all members of an object as (name, value) pairs sorted by name. - Optionally, only return members that satisfy a given predicate.""" - results = [] - for key in dir(object): - try: - value = getattr(object, key) - except AttributeError: - continue - if not predicate or predicate(value): - results.append((key, value)) - results.sort() - return results - -Attribute = namedtuple('Attribute', 'name kind defining_class object') - -def classify_class_attrs(cls): - """Return list of attribute-descriptor tuples. - - For each name in dir(cls), the return list contains a 4-tuple - with these elements: - - 0. The name (a string). - - 1. The kind of attribute this is, one of these strings: - 'class method' created via classmethod() - 'static method' created via staticmethod() - 'property' created via property() - 'method' any other flavor of method - 'data' not a method - - 2. The class which defined this attribute (a class). - - 3. The object as obtained directly from the defining class's - __dict__, not via getattr. This is especially important for - data attributes: C.data is just a data object, but - C.__dict__['data'] may be a data descriptor with additional - info, like a __doc__ string. - """ - - mro = getmro(cls) - names = dir(cls) - result = [] - for name in names: - # Get the object associated with the name, and where it was defined. - # Getting an obj from the __dict__ sometimes reveals more than - # using getattr. Static and class methods are dramatic examples. - # Furthermore, some objects may raise an Exception when fetched with - # getattr(). This is the case with some descriptors (bug #1785). - # Thus, we only use getattr() as a last resort. - homecls = None - for base in (cls,) + mro: - if name in base.__dict__: - obj = base.__dict__[name] - homecls = base - break - else: - obj = getattr(cls, name) - homecls = getattr(obj, "__objclass__", homecls) - - # Classify the object. - if isinstance(obj, staticmethod): - kind = "static method" - elif isinstance(obj, classmethod): - kind = "class method" - elif isinstance(obj, property): - kind = "property" - elif ismethoddescriptor(obj): - kind = "method" - elif isdatadescriptor(obj): - kind = "data" - else: - obj_via_getattr = getattr(cls, name) - if (ismethod(obj_via_getattr) or - ismethoddescriptor(obj_via_getattr)): - kind = "method" - else: - kind = "data" - obj = obj_via_getattr - - result.append(Attribute(name, kind, homecls, obj)) - - return result - -# ----------------------------------------------------------- class helpers -def _searchbases(cls, accum): - # Simulate the "classic class" search order. - if cls in accum: - return - accum.append(cls) - for base in cls.__bases__: - _searchbases(base, accum) - -def getmro(cls): - "Return tuple of base classes (including cls) in method resolution order." - if hasattr(cls, "__mro__"): - return cls.__mro__ - else: - result = [] - _searchbases(cls, result) - return tuple(result) - -# -------------------------------------------------- source code extraction -def indentsize(line): - """Return the indent size, in spaces, at the start of a line of text.""" - expline = string.expandtabs(line) - return len(expline) - len(string.lstrip(expline)) - -def getdoc(object): - """Get the documentation string for an object. - - All tabs are expanded to spaces. To clean up docstrings that are - indented to line up with blocks of code, any whitespace than can be - uniformly removed from the second line onwards is removed.""" - try: - doc = object.__doc__ - except AttributeError: - return None - if not isinstance(doc, types.StringTypes): - return None - return cleandoc(doc) - -def cleandoc(doc): - """Clean up indentation from docstrings. - - Any whitespace that can be uniformly removed from the second line - onwards is removed.""" - try: - lines = string.split(string.expandtabs(doc), '\n') - except UnicodeError: - return None - else: - # Find minimum indentation of any non-blank lines after first line. - margin = sys.maxint - for line in lines[1:]: - content = len(string.lstrip(line)) - if content: - indent = len(line) - content - margin = min(margin, indent) - # Remove indentation. - if lines: - lines[0] = lines[0].lstrip() - if margin < sys.maxint: - for i in range(1, len(lines)): lines[i] = lines[i][margin:] - # Remove any trailing or leading blank lines. - while lines and not lines[-1]: - lines.pop() - while lines and not lines[0]: - lines.pop(0) - return string.join(lines, '\n') - -def getfile(object): - """Work out which source or compiled file an object was defined in.""" - if ismodule(object): - if hasattr(object, '__file__'): - return object.__file__ - raise TypeError('{!r} is a built-in module'.format(object)) - if isclass(object): - object = sys.modules.get(object.__module__) - if hasattr(object, '__file__'): - return object.__file__ - raise TypeError('{!r} is a built-in class'.format(object)) - if ismethod(object): - object = object.im_func - if isfunction(object): - object = object.func_code - if istraceback(object): - object = object.tb_frame - if isframe(object): - object = object.f_code - if iscode(object): - return object.co_filename - raise TypeError('{!r} is not a module, class, method, ' - 'function, traceback, frame, or code object'.format(object)) - -ModuleInfo = namedtuple('ModuleInfo', 'name suffix mode module_type') - -def getmoduleinfo(path): - """Get the module name, suffix, mode, and module type for a given file.""" - filename = os.path.basename(path) - suffixes = map(lambda info: - (-len(info[0]), info[0], info[1], info[2]), - imp.get_suffixes()) - suffixes.sort() # try longest suffixes first, in case they overlap - for neglen, suffix, mode, mtype in suffixes: - if filename[neglen:] == suffix: - return ModuleInfo(filename[:neglen], suffix, mode, mtype) - -def getmodulename(path): - """Return the module name for a given file, or None.""" - info = getmoduleinfo(path) - if info: return info[0] - -def getsourcefile(object): - """Return the filename that can be used to locate an object's source. - Return None if no way can be identified to get the source. - """ - filename = getfile(object) - if string.lower(filename[-4:]) in ('.pyc', '.pyo'): - filename = filename[:-4] + '.py' - for suffix, mode, kind in imp.get_suffixes(): - if 'b' in mode and string.lower(filename[-len(suffix):]) == suffix: - # Looks like a binary file. We want to only return a text file. - return None - if os.path.exists(filename): - return filename - # only return a non-existent filename if the module has a PEP 302 loader - if hasattr(getmodule(object, filename), '__loader__'): - return filename - # or it is in the linecache - if filename in linecache.cache: - return filename - -def getabsfile(object, _filename=None): - """Return an absolute path to the source or compiled file for an object. - - The idea is for each object to have a unique origin, so this routine - normalizes the result as much as possible.""" - if _filename is None: - _filename = getsourcefile(object) or getfile(object) - return os.path.normcase(os.path.abspath(_filename)) - -modulesbyfile = {} -_filesbymodname = {} - -def getmodule(object, _filename=None): - """Return the module an object was defined in, or None if not found.""" - if ismodule(object): - return object - if hasattr(object, '__module__'): - return sys.modules.get(object.__module__) - # Try the filename to modulename cache - if _filename is not None and _filename in modulesbyfile: - return sys.modules.get(modulesbyfile[_filename]) - # Try the cache again with the absolute file name - try: - file = getabsfile(object, _filename) - except TypeError: - return None - if file in modulesbyfile: - return sys.modules.get(modulesbyfile[file]) - # Update the filename to module name cache and check yet again - # Copy sys.modules in order to cope with changes while iterating - for modname, module in sys.modules.items(): - if ismodule(module) and hasattr(module, '__file__'): - f = module.__file__ - if f == _filesbymodname.get(modname, None): - # Have already mapped this module, so skip it - continue - _filesbymodname[modname] = f - f = getabsfile(module) - # Always map to the name the module knows itself by - modulesbyfile[f] = modulesbyfile[ - os.path.realpath(f)] = module.__name__ - if file in modulesbyfile: - return sys.modules.get(modulesbyfile[file]) - # Check the main module - main = sys.modules['__main__'] - if not hasattr(object, '__name__'): - return None - if hasattr(main, object.__name__): - mainobject = getattr(main, object.__name__) - if mainobject is object: - return main - # Check builtins - builtin = sys.modules['__builtin__'] - if hasattr(builtin, object.__name__): - builtinobject = getattr(builtin, object.__name__) - if builtinobject is object: - return builtin - -def findsource(object): - """Return the entire source file and starting line number for an object. - - The argument may be a module, class, method, function, traceback, frame, - or code object. The source code is returned as a list of all the lines - in the file and the line number indexes a line in that list. An IOError - is raised if the source code cannot be retrieved.""" - - file = getfile(object) - sourcefile = getsourcefile(object) - if not sourcefile and file[:1] + file[-1:] != '<>': - raise IOError('source code not available') - file = sourcefile if sourcefile else file - - module = getmodule(object, file) - if module: - lines = linecache.getlines(file, module.__dict__) - else: - lines = linecache.getlines(file) - if not lines: - raise IOError('could not get source code') - - if ismodule(object): - return lines, 0 - - if isclass(object): - name = object.__name__ - pat = re.compile(r'^(\s*)class\s*' + name + r'\b') - # make some effort to find the best matching class definition: - # use the one with the least indentation, which is the one - # that's most probably not inside a function definition. - candidates = [] - for i in range(len(lines)): - match = pat.match(lines[i]) - if match: - # if it's at toplevel, it's already the best one - if lines[i][0] == 'c': - return lines, i - # else add whitespace to candidate list - candidates.append((match.group(1), i)) - if candidates: - # this will sort by whitespace, and by line number, - # less whitespace first - candidates.sort() - return lines, candidates[0][1] - else: - raise IOError('could not find class definition') - - if ismethod(object): - object = object.im_func - if isfunction(object): - object = object.func_code - if istraceback(object): - object = object.tb_frame - if isframe(object): - object = object.f_code - if iscode(object): - if not hasattr(object, 'co_firstlineno'): - raise IOError('could not find function definition') - lnum = object.co_firstlineno - 1 - pat = re.compile(r'^(\s*def\s)|(.*(? 0: - if pat.match(lines[lnum]): break - lnum = lnum - 1 - return lines, lnum - raise IOError('could not find code object') - -def getcomments(object): - """Get lines of comments immediately preceding an object's source code. - - Returns None when source can't be found. - """ - try: - lines, lnum = findsource(object) - except (IOError, TypeError): - return None - - if ismodule(object): - # Look for a comment block at the top of the file. - start = 0 - if lines and lines[0][:2] == '#!': start = 1 - while start < len(lines) and string.strip(lines[start]) in ('', '#'): - start = start + 1 - if start < len(lines) and lines[start][:1] == '#': - comments = [] - end = start - while end < len(lines) and lines[end][:1] == '#': - comments.append(string.expandtabs(lines[end])) - end = end + 1 - return string.join(comments, '') - - # Look for a preceding block of comments at the same indentation. - elif lnum > 0: - indent = indentsize(lines[lnum]) - end = lnum - 1 - if end >= 0 and string.lstrip(lines[end])[:1] == '#' and \ - indentsize(lines[end]) == indent: - comments = [string.lstrip(string.expandtabs(lines[end]))] - if end > 0: - end = end - 1 - comment = string.lstrip(string.expandtabs(lines[end])) - while comment[:1] == '#' and indentsize(lines[end]) == indent: - comments[:0] = [comment] - end = end - 1 - if end < 0: break - comment = string.lstrip(string.expandtabs(lines[end])) - while comments and string.strip(comments[0]) == '#': - comments[:1] = [] - while comments and string.strip(comments[-1]) == '#': - comments[-1:] = [] - return string.join(comments, '') - -class EndOfBlock(Exception): pass - -class BlockFinder: - """Provide a tokeneater() method to detect the end of a code block.""" - def __init__(self): - self.indent = 0 - self.islambda = False - self.started = False - self.passline = False - self.last = 1 - - def tokeneater(self, type, token, srow_scol, erow_ecol, line): - srow, scol = srow_scol - erow, ecol = erow_ecol - if not self.started: - # look for the first "def", "class" or "lambda" - if token in ("def", "class", "lambda"): - if token == "lambda": - self.islambda = True - self.started = True - self.passline = True # skip to the end of the line - elif type == tokenize.NEWLINE: - self.passline = False # stop skipping when a NEWLINE is seen - self.last = srow - if self.islambda: # lambdas always end at the first NEWLINE - raise EndOfBlock - elif self.passline: - pass - elif type == tokenize.INDENT: - self.indent = self.indent + 1 - self.passline = True - elif type == tokenize.DEDENT: - self.indent = self.indent - 1 - # the end of matching indent/dedent pairs end a block - # (note that this only works for "def"/"class" blocks, - # not e.g. for "if: else:" or "try: finally:" blocks) - if self.indent <= 0: - raise EndOfBlock - elif self.indent == 0 and type not in (tokenize.COMMENT, tokenize.NL): - # any other token on the same indentation level end the previous - # block as well, except the pseudo-tokens COMMENT and NL. - raise EndOfBlock - -def getblock(lines): - """Extract the block of code at the top of the given list of lines.""" - blockfinder = BlockFinder() - try: - tokenize.tokenize(iter(lines).next, blockfinder.tokeneater) - except (EndOfBlock, IndentationError): - pass - return lines[:blockfinder.last] - -def getsourcelines(object): - """Return a list of source lines and starting line number for an object. - - The argument may be a module, class, method, function, traceback, frame, - or code object. The source code is returned as a list of the lines - corresponding to the object and the line number indicates where in the - original source file the first line of code was found. An IOError is - raised if the source code cannot be retrieved.""" - lines, lnum = findsource(object) - - if ismodule(object): return lines, 0 - else: return getblock(lines[lnum:]), lnum + 1 - -def getsource(object): - """Return the text of the source code for an object. - - The argument may be a module, class, method, function, traceback, frame, - or code object. The source code is returned as a single string. An - IOError is raised if the source code cannot be retrieved.""" - lines, lnum = getsourcelines(object) - return string.join(lines, '') - -# --------------------------------------------------- class tree extraction -def walktree(classes, children, parent): - """Recursive helper function for getclasstree().""" - results = [] - classes.sort(key=attrgetter('__module__', '__name__')) - for c in classes: - results.append((c, c.__bases__)) - if c in children: - results.append(walktree(children[c], children, c)) - return results - -def getclasstree(classes, unique=0): - """Arrange the given list of classes into a hierarchy of nested lists. - - Where a nested list appears, it contains classes derived from the class - whose entry immediately precedes the list. Each entry is a 2-tuple - containing a class and a tuple of its base classes. If the 'unique' - argument is true, exactly one entry appears in the returned structure - for each class in the given list. Otherwise, classes using multiple - inheritance and their descendants will appear multiple times.""" - children = {} - roots = [] - for c in classes: - if c.__bases__: - for parent in c.__bases__: - if not parent in children: - children[parent] = [] - if c not in children[parent]: - children[parent].append(c) - if unique and parent in classes: break - elif c not in roots: - roots.append(c) - for parent in children: - if parent not in classes: - roots.append(parent) - return walktree(roots, children, None) - -# ------------------------------------------------ argument list extraction -Arguments = namedtuple('Arguments', 'args varargs keywords') - -def getargs(co): - """Get information about the arguments accepted by a code object. - - Three things are returned: (args, varargs, varkw), where 'args' is - a list of argument names (possibly containing nested lists), and - 'varargs' and 'varkw' are the names of the * and ** arguments or None.""" - - if not iscode(co): - raise TypeError('{!r} is not a code object'.format(co)) - - nargs = co.co_argcount - names = co.co_varnames - args = list(names[:nargs]) - step = 0 - - # The following acrobatics are for anonymous (tuple) arguments. - for i in range(nargs): - if args[i][:1] in ('', '.'): - stack, remain, count = [], [], [] - while step < len(co.co_code): - op = ord(co.co_code[step]) - step = step + 1 - if op >= dis.HAVE_ARGUMENT: - opname = dis.opname[op] - value = ord(co.co_code[step]) + ord(co.co_code[step+1])*256 - step = step + 2 - if opname in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): - remain.append(value) - count.append(value) - elif opname == 'STORE_FAST': - stack.append(names[value]) - - # Special case for sublists of length 1: def foo((bar)) - # doesn't generate the UNPACK_TUPLE bytecode, so if - # `remain` is empty here, we have such a sublist. - if not remain: - stack[0] = [stack[0]] - break - else: - remain[-1] = remain[-1] - 1 - while remain[-1] == 0: - remain.pop() - size = count.pop() - stack[-size:] = [stack[-size:]] - if not remain: break - remain[-1] = remain[-1] - 1 - if not remain: break - args[i] = stack[0] - - varargs = None - if co.co_flags & CO_VARARGS: - varargs = co.co_varnames[nargs] - nargs = nargs + 1 - varkw = None - if co.co_flags & CO_VARKEYWORDS: - varkw = co.co_varnames[nargs] - return Arguments(args, varargs, varkw) - -ArgSpec = namedtuple('ArgSpec', 'args varargs keywords defaults') - -def getargspec(func): - """Get the names and default values of a function's arguments. - - A tuple of four things is returned: (args, varargs, varkw, defaults). - 'args' is a list of the argument names (it may contain nested lists). - 'varargs' and 'varkw' are the names of the * and ** arguments or None. - 'defaults' is an n-tuple of the default values of the last n arguments. - """ - - if ismethod(func): - func = func.im_func - if not isfunction(func): - raise TypeError('{!r} is not a Python function'.format(func)) - args, varargs, varkw = getargs(func.func_code) - return ArgSpec(args, varargs, varkw, func.func_defaults) - -ArgInfo = namedtuple('ArgInfo', 'args varargs keywords locals') - -def getargvalues(frame): - """Get information about arguments passed into a particular frame. - - A tuple of four things is returned: (args, varargs, varkw, locals). - 'args' is a list of the argument names (it may contain nested lists). - 'varargs' and 'varkw' are the names of the * and ** arguments or None. - 'locals' is the locals dictionary of the given frame.""" - args, varargs, varkw = getargs(frame.f_code) - return ArgInfo(args, varargs, varkw, frame.f_locals) - -def joinseq(seq): - if len(seq) == 1: - return '(' + seq[0] + ',)' - else: - return '(' + string.join(seq, ', ') + ')' - -def strseq(object, convert, join=joinseq): - """Recursively walk a sequence, stringifying each element.""" - if type(object) in (list, tuple): - return join(map(lambda o, c=convert, j=join: strseq(o, c, j), object)) - else: - return convert(object) - -def formatargspec(args, varargs=None, varkw=None, defaults=None, - formatarg=str, - formatvarargs=lambda name: '*' + name, - formatvarkw=lambda name: '**' + name, - formatvalue=lambda value: '=' + repr(value), - join=joinseq): - """Format an argument spec from the 4 values returned by getargspec. - - The first four arguments are (args, varargs, varkw, defaults). The - other four arguments are the corresponding optional formatting functions - that are called to turn names and values into strings. The ninth - argument is an optional function to format the sequence of arguments.""" - specs = [] - if defaults: - firstdefault = len(args) - len(defaults) - for i, arg in enumerate(args): - spec = strseq(arg, formatarg, join) - if defaults and i >= firstdefault: - spec = spec + formatvalue(defaults[i - firstdefault]) - specs.append(spec) - if varargs is not None: - specs.append(formatvarargs(varargs)) - if varkw is not None: - specs.append(formatvarkw(varkw)) - return '(' + string.join(specs, ', ') + ')' - -def formatargvalues(args, varargs, varkw, locals, - formatarg=str, - formatvarargs=lambda name: '*' + name, - formatvarkw=lambda name: '**' + name, - formatvalue=lambda value: '=' + repr(value), - join=joinseq): - """Format an argument spec from the 4 values returned by getargvalues. - - The first four arguments are (args, varargs, varkw, locals). The - next four arguments are the corresponding optional formatting functions - that are called to turn names and values into strings. The ninth - argument is an optional function to format the sequence of arguments.""" - def convert(name, locals=locals, - formatarg=formatarg, formatvalue=formatvalue): - return formatarg(name) + formatvalue(locals[name]) - specs = [] - for i in range(len(args)): - specs.append(strseq(args[i], convert, join)) - if varargs: - specs.append(formatvarargs(varargs) + formatvalue(locals[varargs])) - if varkw: - specs.append(formatvarkw(varkw) + formatvalue(locals[varkw])) - return '(' + string.join(specs, ', ') + ')' - -def getcallargs(func, *positional, **named): - """Get the mapping of arguments to values. - - A dict is returned, with keys the function argument names (including the - names of the * and ** arguments, if any), and values the respective bound - values from 'positional' and 'named'.""" - args, varargs, varkw, defaults = getargspec(func) - f_name = func.__name__ - arg2value = {} - - # The following closures are basically because of tuple parameter unpacking. - assigned_tuple_params = [] - def assign(arg, value): - if isinstance(arg, str): - arg2value[arg] = value - else: - assigned_tuple_params.append(arg) - value = iter(value) - for i, subarg in enumerate(arg): - try: - subvalue = next(value) - except StopIteration: - raise ValueError('need more than %d %s to unpack' % - (i, 'values' if i > 1 else 'value')) - assign(subarg,subvalue) - try: - next(value) - except StopIteration: - pass - else: - raise ValueError('too many values to unpack') - def is_assigned(arg): - if isinstance(arg,str): - return arg in arg2value - return arg in assigned_tuple_params - if ismethod(func) and func.im_self is not None: - # implicit 'self' (or 'cls' for classmethods) argument - positional = (func.im_self,) + positional - num_pos = len(positional) - num_total = num_pos + len(named) - num_args = len(args) - num_defaults = len(defaults) if defaults else 0 - for arg, value in zip(args, positional): - assign(arg, value) - if varargs: - if num_pos > num_args: - assign(varargs, positional[-(num_pos-num_args):]) - else: - assign(varargs, ()) - elif 0 < num_args < num_pos: - raise TypeError('%s() takes %s %d %s (%d given)' % ( - f_name, 'at most' if defaults else 'exactly', num_args, - 'arguments' if num_args > 1 else 'argument', num_total)) - elif num_args == 0 and num_total: - if varkw: - if num_pos: - # XXX: We should use num_pos, but Python also uses num_total: - raise TypeError('%s() takes exactly 0 arguments ' - '(%d given)' % (f_name, num_total)) - else: - raise TypeError('%s() takes no arguments (%d given)' % - (f_name, num_total)) - for arg in args: - if isinstance(arg, str) and arg in named: - if is_assigned(arg): - raise TypeError("%s() got multiple values for keyword " - "argument '%s'" % (f_name, arg)) - else: - assign(arg, named.pop(arg)) - if defaults: # fill in any missing values with the defaults - for arg, value in zip(args[-num_defaults:], defaults): - if not is_assigned(arg): - assign(arg, value) - if varkw: - assign(varkw, named) - elif named: - unexpected = next(iter(named)) - try: - unicode - except NameError: - pass - else: - if isinstance(unexpected, unicode): - unexpected = unexpected.encode(sys.getdefaultencoding(), 'replace') - raise TypeError("%s() got an unexpected keyword argument '%s'" % - (f_name, unexpected)) - unassigned = num_args - len([arg for arg in args if is_assigned(arg)]) - if unassigned: - num_required = num_args - num_defaults - raise TypeError('%s() takes %s %d %s (%d given)' % ( - f_name, 'at least' if defaults else 'exactly', num_required, - 'arguments' if num_required > 1 else 'argument', num_total)) - return arg2value - -# -------------------------------------------------- stack frame extraction - -Traceback = namedtuple('Traceback', 'filename lineno function code_context index') - -def getframeinfo(frame, context=1): - """Get information about a frame or traceback object. - - A tuple of five things is returned: the filename, the line number of - the current line, the function name, a list of lines of context from - the source code, and the index of the current line within that list. - The optional second argument specifies the number of lines of context - to return, which are centered around the current line.""" - if istraceback(frame): - lineno = frame.tb_lineno - frame = frame.tb_frame - else: - lineno = frame.f_lineno - if not isframe(frame): - raise TypeError('{!r} is not a frame or traceback object'.format(frame)) - - filename = getsourcefile(frame) or getfile(frame) - if context > 0: - start = lineno - 1 - context//2 - try: - lines, lnum = findsource(frame) - except IOError: - lines = index = None - else: - start = max(start, 1) - start = max(0, min(start, len(lines) - context)) - lines = lines[start:start+context] - index = lineno - 1 - start - else: - lines = index = None - - return Traceback(filename, lineno, frame.f_code.co_name, lines, index) - -def getlineno(frame): - """Get the line number from a frame object, allowing for optimization.""" - # FrameType.f_lineno is now a descriptor that grovels co_lnotab - return frame.f_lineno - -def getouterframes(frame, context=1): - """Get a list of records for a frame and all higher (calling) frames. - - Each record contains a frame object, filename, line number, function - name, a list of lines of context, and index within the context.""" - framelist = [] - while frame: - framelist.append((frame,) + getframeinfo(frame, context)) - frame = frame.f_back - return framelist - -def getinnerframes(tb, context=1): - """Get a list of records for a traceback's frame and all lower frames. - - Each record contains a frame object, filename, line number, function - name, a list of lines of context, and index within the context.""" - framelist = [] - while tb: - framelist.append((tb.tb_frame,) + getframeinfo(tb, context)) - tb = tb.tb_next - return framelist - -if hasattr(sys, '_getframe'): - currentframe = sys._getframe -else: - currentframe = lambda _=None: None - -def stack(context=1): - """Return a list of records for the stack above the caller's frame.""" - return getouterframes(sys._getframe(1), context) - -def trace(context=1): - """Return a list of records for the stack below the current exception.""" - return getinnerframes(sys.exc_info()[2], context) diff --git a/python/Lib/io.py b/python/Lib/io.py deleted file mode 100755 index cd68719e14..0000000000 --- a/python/Lib/io.py +++ /dev/null @@ -1,90 +0,0 @@ -"""The io module provides the Python interfaces to stream handling. The -builtin open function is defined in this module. - -At the top of the I/O hierarchy is the abstract base class IOBase. It -defines the basic interface to a stream. Note, however, that there is no -separation between reading and writing to streams; implementations are -allowed to raise an IOError if they do not support a given operation. - -Extending IOBase is RawIOBase which deals simply with the reading and -writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide -an interface to OS files. - -BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its -subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer -streams that are readable, writable, and both respectively. -BufferedRandom provides a buffered interface to random access -streams. BytesIO is a simple stream of in-memory bytes. - -Another IOBase subclass, TextIOBase, deals with the encoding and decoding -of streams into text. TextIOWrapper, which extends it, is a buffered text -interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO -is an in-memory stream for text. - -Argument names are not part of the specification, and only the arguments -of open() are intended to be used as keyword arguments. - -data: - -DEFAULT_BUFFER_SIZE - - An int containing the default buffer size used by the module's buffered - I/O classes. open() uses the file's blksize (as obtained by os.stat) if - possible. -""" -# New I/O library conforming to PEP 3116. - -__author__ = ("Guido van Rossum , " - "Mike Verdone , " - "Mark Russell , " - "Antoine Pitrou , " - "Amaury Forgeot d'Arc , " - "Benjamin Peterson ") - -__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO", - "BytesIO", "StringIO", "BufferedIOBase", - "BufferedReader", "BufferedWriter", "BufferedRWPair", - "BufferedRandom", "TextIOBase", "TextIOWrapper", - "UnsupportedOperation", "SEEK_SET", "SEEK_CUR", "SEEK_END"] - - -import _io -import abc - -from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation, - open, FileIO, BytesIO, StringIO, BufferedReader, - BufferedWriter, BufferedRWPair, BufferedRandom, - IncrementalNewlineDecoder, TextIOWrapper) - -OpenWrapper = _io.open # for compatibility with _pyio - -# for seek() -SEEK_SET = 0 -SEEK_CUR = 1 -SEEK_END = 2 - -# Declaring ABCs in C is tricky so we do it here. -# Method descriptions and default implementations are inherited from the C -# version however. -class IOBase(_io._IOBase): - __metaclass__ = abc.ABCMeta - __doc__ = _io._IOBase.__doc__ - -class RawIOBase(_io._RawIOBase, IOBase): - __doc__ = _io._RawIOBase.__doc__ - -class BufferedIOBase(_io._BufferedIOBase, IOBase): - __doc__ = _io._BufferedIOBase.__doc__ - -class TextIOBase(_io._TextIOBase, IOBase): - __doc__ = _io._TextIOBase.__doc__ - -RawIOBase.register(FileIO) - -for klass in (BytesIO, BufferedReader, BufferedWriter, BufferedRandom, - BufferedRWPair): - BufferedIOBase.register(klass) - -for klass in (StringIO, TextIOWrapper): - TextIOBase.register(klass) -del klass diff --git a/python/Lib/json/__init__.py b/python/Lib/json/__init__.py deleted file mode 100755 index 324fbca99b..0000000000 --- a/python/Lib/json/__init__.py +++ /dev/null @@ -1,352 +0,0 @@ -r"""JSON (JavaScript Object Notation) is a subset of -JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data -interchange format. - -:mod:`json` exposes an API familiar to users of the standard library -:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained -version of the :mod:`json` library contained in Python 2.6, but maintains -compatibility with Python 2.4 and Python 2.5 and (currently) has -significant performance advantages, even without using the optional C -extension for speedups. - -Encoding basic Python object hierarchies:: - - >>> import json - >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) - '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") - "\"foo\bar" - >>> print json.dumps(u'\u1234') - "\u1234" - >>> print json.dumps('\\') - "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) - {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO - >>> io = StringIO() - >>> json.dump(['streaming API'], io) - >>> io.getvalue() - '["streaming API"]' - -Compact encoding:: - - >>> import json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], sort_keys=True, separators=(',',':')) - '[1,2,3,{"4":5,"6":7}]' - -Pretty printing:: - - >>> import json - >>> print json.dumps({'4': 5, '6': 7}, sort_keys=True, - ... indent=4, separators=(',', ': ')) - { - "4": 5, - "6": 7 - } - -Decoding JSON:: - - >>> import json - >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] - >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj - True - >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' - True - >>> from StringIO import StringIO - >>> io = StringIO('["streaming API"]') - >>> json.load(io)[0] == 'streaming API' - True - -Specializing JSON object decoding:: - - >>> import json - >>> def as_complex(dct): - ... if '__complex__' in dct: - ... return complex(dct['real'], dct['imag']) - ... return dct - ... - >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', - ... object_hook=as_complex) - (1+2j) - >>> from decimal import Decimal - >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') - True - -Specializing JSON object encoding:: - - >>> import json - >>> def encode_complex(obj): - ... if isinstance(obj, complex): - ... return [obj.real, obj.imag] - ... raise TypeError(repr(o) + " is not JSON serializable") - ... - >>> json.dumps(2 + 1j, default=encode_complex) - '[2.0, 1.0]' - >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) - '[2.0, 1.0]' - >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) - '[2.0, 1.0]' - - -Using json.tool from the shell to validate and pretty-print:: - - $ echo '{"json":"obj"}' | python -m json.tool - { - "json": "obj" - } - $ echo '{ 1.2:3.4}' | python -m json.tool - Expecting property name enclosed in double quotes: line 1 column 3 (char 2) -""" -__version__ = '2.0.9' -__all__ = [ - 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONEncoder', -] - -__author__ = 'Bob Ippolito ' - -from .decoder import JSONDecoder -from .encoder import JSONEncoder - -_default_encoder = JSONEncoder( - skipkeys=False, - ensure_ascii=True, - check_circular=True, - allow_nan=True, - indent=None, - separators=None, - encoding='utf-8', - default=None, -) - -def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, sort_keys=False, **kw): - """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a - ``.write()``-supporting file-like object). - - If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - If ``ensure_ascii`` is true (the default), all non-ASCII characters in the - output are escaped with ``\uXXXX`` sequences, and the result is a ``str`` - instance consisting of ASCII characters only. If ``ensure_ascii`` is - false, some chunks written to ``fp`` may be ``unicode`` instances. - This usually happens because the input contains unicode strings or the - ``encoding`` parameter is used. Unless ``fp.write()`` explicitly - understands ``unicode`` (as in ``codecs.getwriter``) this is likely to - cause an error. - - If ``check_circular`` is false, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. Since the default item separator is ``', '``, the - output might include trailing whitespace when ``indent`` is specified. - You can use ``separators=(',', ': ')`` to avoid this. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - If *sort_keys* is true (default: ``False``), then the output of - dictionaries will be sorted by key. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. - - """ - # cached encoder - if (not skipkeys and ensure_ascii and - check_circular and allow_nan and - cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not sort_keys and not kw): - iterable = _default_encoder.iterencode(obj) - else: - if cls is None: - cls = JSONEncoder - iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, - check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, encoding=encoding, - default=default, sort_keys=sort_keys, **kw).iterencode(obj) - # could accelerate with writelines in some versions of Python, at - # a debuggability cost - for chunk in iterable: - fp.write(chunk) - - -def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, sort_keys=False, **kw): - """Serialize ``obj`` to a JSON formatted ``str``. - - If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - - If ``ensure_ascii`` is false, all non-ASCII characters are not escaped, and - the return value may be a ``unicode`` instance. See ``dump`` for details. - - If ``check_circular`` is false, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in - strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. Since the default item separator is ``', '``, the - output might include trailing whitespace when ``indent`` is specified. - You can use ``separators=(',', ': ')`` to avoid this. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - If *sort_keys* is true (default: ``False``), then the output of - dictionaries will be sorted by key. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. - - """ - # cached encoder - if (not skipkeys and ensure_ascii and - check_circular and allow_nan and - cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not sort_keys and not kw): - return _default_encoder.encode(obj) - if cls is None: - cls = JSONEncoder - return cls( - skipkeys=skipkeys, ensure_ascii=ensure_ascii, - check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, encoding=encoding, default=default, - sort_keys=sort_keys, **kw).encode(obj) - - -_default_decoder = JSONDecoder(encoding=None, object_hook=None, - object_pairs_hook=None) - - -def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): - """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing - a JSON document) to a Python object. - - If the contents of ``fp`` is encoded with an ASCII based encoding other - than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must - be specified. Encodings that are not ASCII based (such as UCS-2) are - not allowed, and should be wrapped with - ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` - object and passed to ``loads()`` - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - ``object_pairs_hook`` is an optional function that will be called with the - result of any object literal decoded with an ordered list of pairs. The - return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders that rely on the - order that the key and value pairs are decoded (for example, - collections.OrderedDict will remember the order of insertion). If - ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg; otherwise ``JSONDecoder`` is used. - - """ - return loads(fp.read(), - encoding=encoding, cls=cls, object_hook=object_hook, - parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, - **kw) - - -def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): - """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON - document) to a Python object. - - If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding - other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name - must be specified. Encodings that are not ASCII based (such as UCS-2) - are not allowed and should be decoded to ``unicode`` first. - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - ``object_pairs_hook`` is an optional function that will be called with the - result of any object literal decoded with an ordered list of pairs. The - return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders that rely on the - order that the key and value pairs are decoded (for example, - collections.OrderedDict will remember the order of insertion). If - ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. - - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN. - This can be used to raise an exception if invalid JSON numbers - are encountered. - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg; otherwise ``JSONDecoder`` is used. - - """ - if (cls is None and encoding is None and object_hook is None and - parse_int is None and parse_float is None and - parse_constant is None and object_pairs_hook is None and not kw): - return _default_decoder.decode(s) - if cls is None: - cls = JSONDecoder - if object_hook is not None: - kw['object_hook'] = object_hook - if object_pairs_hook is not None: - kw['object_pairs_hook'] = object_pairs_hook - if parse_float is not None: - kw['parse_float'] = parse_float - if parse_int is not None: - kw['parse_int'] = parse_int - if parse_constant is not None: - kw['parse_constant'] = parse_constant - return cls(encoding=encoding, **kw).decode(s) diff --git a/python/Lib/json/decoder.py b/python/Lib/json/decoder.py deleted file mode 100755 index 5141f879d9..0000000000 --- a/python/Lib/json/decoder.py +++ /dev/null @@ -1,383 +0,0 @@ -"""Implementation of JSONDecoder -""" -import re -import sys -import struct - -from json import scanner -try: - from _json import scanstring as c_scanstring -except ImportError: - c_scanstring = None - -__all__ = ['JSONDecoder'] - -FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL - -def _floatconstants(): - nan, = struct.unpack('>d', b'\x7f\xf8\x00\x00\x00\x00\x00\x00') - inf, = struct.unpack('>d', b'\x7f\xf0\x00\x00\x00\x00\x00\x00') - return nan, inf, -inf - -NaN, PosInf, NegInf = _floatconstants() - - -def linecol(doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - if lineno == 1: - colno = pos + 1 - else: - colno = pos - doc.rindex('\n', 0, pos) - return lineno, colno - - -def errmsg(msg, doc, pos, end=None): - # Note that this function is called from _json - lineno, colno = linecol(doc, pos) - if end is None: - fmt = '{0}: line {1} column {2} (char {3})' - return fmt.format(msg, lineno, colno, pos) - #fmt = '%s: line %d column %d (char %d)' - #return fmt % (msg, lineno, colno, pos) - endlineno, endcolno = linecol(doc, end) - fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' - return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) - #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' - #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) - - -_CONSTANTS = { - '-Infinity': NegInf, - 'Infinity': PosInf, - 'NaN': NaN, -} - -STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) -BACKSLASH = { - '"': u'"', '\\': u'\\', '/': u'/', - 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', -} - -DEFAULT_ENCODING = "utf-8" - -def _decode_uXXXX(s, pos): - esc = s[pos + 1:pos + 5] - if len(esc) == 4 and esc[1] not in 'xX': - try: - return int(esc, 16) - except ValueError: - pass - msg = "Invalid \\uXXXX escape" - raise ValueError(errmsg(msg, s, pos)) - -def py_scanstring(s, end, encoding=None, strict=True, - _b=BACKSLASH, _m=STRINGCHUNK.match): - """Scan the string s for a JSON string. End is the index of the - character in s after the quote that started the JSON string. - Unescapes all valid JSON string escape sequences and raises ValueError - on attempt to decode an invalid string. If strict is False then literal - control characters are allowed in the string. - - Returns a tuple of the decoded string and the index of the character in s - after the end quote.""" - if encoding is None: - encoding = DEFAULT_ENCODING - chunks = [] - _append = chunks.append - begin = end - 1 - while 1: - chunk = _m(s, end) - if chunk is None: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) - end = chunk.end() - content, terminator = chunk.groups() - # Content is contains zero or more unescaped string characters - if content: - if not isinstance(content, unicode): - content = unicode(content, encoding) - _append(content) - # Terminator is the end of string, a literal control character, - # or a backslash denoting that an escape sequence follows - if terminator == '"': - break - elif terminator != '\\': - if strict: - #msg = "Invalid control character %r at" % (terminator,) - msg = "Invalid control character {0!r} at".format(terminator) - raise ValueError(errmsg(msg, s, end)) - else: - _append(terminator) - continue - try: - esc = s[end] - except IndexError: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) - # If not a unicode escape sequence, must be in the lookup table - if esc != 'u': - try: - char = _b[esc] - except KeyError: - msg = "Invalid \\escape: " + repr(esc) - raise ValueError(errmsg(msg, s, end)) - end += 1 - else: - # Unicode escape sequence - uni = _decode_uXXXX(s, end) - end += 5 - # Check for surrogate pair on UCS-4 systems - if sys.maxunicode > 65535 and \ - 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': - uni2 = _decode_uXXXX(s, end + 1) - if 0xdc00 <= uni2 <= 0xdfff: - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - end += 6 - char = unichr(uni) - # Append the unescaped character - _append(char) - return u''.join(chunks), end - - -# Use speedup if available -scanstring = c_scanstring or py_scanstring - -WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) -WHITESPACE_STR = ' \t\n\r' - -def JSONObject(s_and_end, encoding, strict, scan_once, object_hook, - object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): - s, end = s_and_end - pairs = [] - pairs_append = pairs.append - # Use a slice to prevent IndexError from being raised, the following - # check will raise a more specific ValueError if the string is empty - nextchar = s[end:end + 1] - # Normally we expect nextchar == '"' - if nextchar != '"': - if nextchar in _ws: - end = _w(s, end).end() - nextchar = s[end:end + 1] - # Trivial empty object - if nextchar == '}': - if object_pairs_hook is not None: - result = object_pairs_hook(pairs) - return result, end + 1 - pairs = {} - if object_hook is not None: - pairs = object_hook(pairs) - return pairs, end + 1 - elif nextchar != '"': - raise ValueError(errmsg( - "Expecting property name enclosed in double quotes", s, end)) - end += 1 - while True: - key, end = scanstring(s, end, encoding, strict) - - # To skip some function call overhead we optimize the fast paths where - # the JSON key separator is ": " or just ":". - if s[end:end + 1] != ':': - end = _w(s, end).end() - if s[end:end + 1] != ':': - raise ValueError(errmsg("Expecting ':' delimiter", s, end)) - end += 1 - - try: - if s[end] in _ws: - end += 1 - if s[end] in _ws: - end = _w(s, end + 1).end() - except IndexError: - pass - - try: - value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) - pairs_append((key, value)) - - try: - nextchar = s[end] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end] - except IndexError: - nextchar = '' - end += 1 - - if nextchar == '}': - break - elif nextchar != ',': - raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) - - try: - nextchar = s[end] - if nextchar in _ws: - end += 1 - nextchar = s[end] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end] - except IndexError: - nextchar = '' - - end += 1 - if nextchar != '"': - raise ValueError(errmsg( - "Expecting property name enclosed in double quotes", s, end - 1)) - if object_pairs_hook is not None: - result = object_pairs_hook(pairs) - return result, end - pairs = dict(pairs) - if object_hook is not None: - pairs = object_hook(pairs) - return pairs, end - -def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): - s, end = s_and_end - values = [] - nextchar = s[end:end + 1] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end:end + 1] - # Look-ahead for trivial empty array - if nextchar == ']': - return values, end + 1 - _append = values.append - while True: - try: - value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) - _append(value) - nextchar = s[end:end + 1] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end:end + 1] - end += 1 - if nextchar == ']': - break - elif nextchar != ',': - raise ValueError(errmsg("Expecting ',' delimiter", s, end)) - try: - if s[end] in _ws: - end += 1 - if s[end] in _ws: - end = _w(s, end + 1).end() - except IndexError: - pass - - return values, end - -class JSONDecoder(object): - """Simple JSON decoder - - Performs the following translations in decoding by default: - - +---------------+-------------------+ - | JSON | Python | - +===============+===================+ - | object | dict | - +---------------+-------------------+ - | array | list | - +---------------+-------------------+ - | string | unicode | - +---------------+-------------------+ - | number (int) | int, long | - +---------------+-------------------+ - | number (real) | float | - +---------------+-------------------+ - | true | True | - +---------------+-------------------+ - | false | False | - +---------------+-------------------+ - | null | None | - +---------------+-------------------+ - - It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as - their corresponding ``float`` values, which is outside the JSON spec. - - """ - - def __init__(self, encoding=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, strict=True, - object_pairs_hook=None): - """``encoding`` determines the encoding used to interpret any ``str`` - objects decoded by this instance (utf-8 by default). It has no - effect when decoding ``unicode`` objects. - - Note that currently only encodings that are a superset of ASCII work, - strings of other encodings should be passed in as ``unicode``. - - ``object_hook``, if specified, will be called with the result - of every JSON object decoded and its return value will be used in - place of the given ``dict``. This can be used to provide custom - deserializations (e.g. to support JSON-RPC class hinting). - - ``object_pairs_hook``, if specified will be called with the result of - every JSON object decoded with an ordered list of pairs. The return - value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders that rely on the - order that the key and value pairs are decoded (for example, - collections.OrderedDict will remember the order of insertion). If - ``object_hook`` is also defined, the ``object_pairs_hook`` takes - priority. - - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN. - This can be used to raise an exception if invalid JSON numbers - are encountered. - - If ``strict`` is false (true is the default), then control - characters will be allowed inside strings. Control characters in - this context are those with character codes in the 0-31 range, - including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. - - """ - self.encoding = encoding - self.object_hook = object_hook - self.object_pairs_hook = object_pairs_hook - self.parse_float = parse_float or float - self.parse_int = parse_int or int - self.parse_constant = parse_constant or _CONSTANTS.__getitem__ - self.strict = strict - self.parse_object = JSONObject - self.parse_array = JSONArray - self.parse_string = scanstring - self.scan_once = scanner.make_scanner(self) - - def decode(self, s, _w=WHITESPACE.match): - """Return the Python representation of ``s`` (a ``str`` or ``unicode`` - instance containing a JSON document) - - """ - obj, end = self.raw_decode(s, idx=_w(s, 0).end()) - end = _w(s, end).end() - if end != len(s): - raise ValueError(errmsg("Extra data", s, end, len(s))) - return obj - - def raw_decode(self, s, idx=0): - """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` - beginning with a JSON document) and return a 2-tuple of the Python - representation and the index in ``s`` where the document ended. - - This can be used to decode a JSON document from a string that may - have extraneous data at the end. - - """ - try: - obj, end = self.scan_once(s, idx) - except StopIteration: - raise ValueError("No JSON object could be decoded") - return obj, end diff --git a/python/Lib/json/encoder.py b/python/Lib/json/encoder.py deleted file mode 100755 index 2414f0a7f5..0000000000 --- a/python/Lib/json/encoder.py +++ /dev/null @@ -1,448 +0,0 @@ -"""Implementation of JSONEncoder -""" -import re - -try: - from _json import encode_basestring_ascii as c_encode_basestring_ascii -except ImportError: - c_encode_basestring_ascii = None -try: - from _json import make_encoder as c_make_encoder -except ImportError: - c_make_encoder = None - -ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') -ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') -HAS_UTF8 = re.compile(r'[\x80-\xff]') -ESCAPE_DCT = { - '\\': '\\\\', - '"': '\\"', - '\b': '\\b', - '\f': '\\f', - '\n': '\\n', - '\r': '\\r', - '\t': '\\t', -} -for i in range(0x20): - ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) - #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) - -INFINITY = float('inf') -FLOAT_REPR = float.__repr__ - -def encode_basestring(s): - """Return a JSON representation of a Python string - - """ - def replace(match): - return ESCAPE_DCT[match.group(0)] - return '"' + ESCAPE.sub(replace, s) + '"' - - -def py_encode_basestring_ascii(s): - """Return an ASCII-only JSON representation of a Python string - - """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') - def replace(match): - s = match.group(0) - try: - return ESCAPE_DCT[s] - except KeyError: - n = ord(s) - if n < 0x10000: - return '\\u{0:04x}'.format(n) - #return '\\u%04x' % (n,) - else: - # surrogate pair - n -= 0x10000 - s1 = 0xd800 | ((n >> 10) & 0x3ff) - s2 = 0xdc00 | (n & 0x3ff) - return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) - #return '\\u%04x\\u%04x' % (s1, s2) - return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' - - -encode_basestring_ascii = ( - c_encode_basestring_ascii or py_encode_basestring_ascii) - -class JSONEncoder(object): - """Extensible JSON encoder for Python data structures. - - Supports the following objects and types by default: - - +-------------------+---------------+ - | Python | JSON | - +===================+===============+ - | dict | object | - +-------------------+---------------+ - | list, tuple | array | - +-------------------+---------------+ - | str, unicode | string | - +-------------------+---------------+ - | int, long, float | number | - +-------------------+---------------+ - | True | true | - +-------------------+---------------+ - | False | false | - +-------------------+---------------+ - | None | null | - +-------------------+---------------+ - - To extend this to recognize other objects, subclass and implement a - ``.default()`` method with another method that returns a serializable - object for ``o`` if possible, otherwise it should call the superclass - implementation (to raise ``TypeError``). - - """ - item_separator = ', ' - key_separator = ': ' - def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None): - """Constructor for JSONEncoder, with sensible defaults. - - If skipkeys is false, then it is a TypeError to attempt - encoding of keys that are not str, int, long, float or None. If - skipkeys is True, such items are simply skipped. - - If *ensure_ascii* is true (the default), all non-ASCII - characters in the output are escaped with \uXXXX sequences, - and the results are str instances consisting of ASCII - characters only. If ensure_ascii is False, a result may be a - unicode instance. This usually happens if the input contains - unicode strings or the *encoding* parameter is used. - - If check_circular is true, then lists, dicts, and custom encoded - objects will be checked for circular references during encoding to - prevent an infinite recursion (which would cause an OverflowError). - Otherwise, no such check takes place. - - If allow_nan is true, then NaN, Infinity, and -Infinity will be - encoded as such. This behavior is not JSON specification compliant, - but is consistent with most JavaScript based encoders and decoders. - Otherwise, it will be a ValueError to encode such floats. - - If sort_keys is true, then the output of dictionaries will be - sorted by key; this is useful for regression tests to ensure - that JSON serializations can be compared on a day-to-day basis. - - If indent is a non-negative integer, then JSON array - elements and object members will be pretty-printed with that - indent level. An indent level of 0 will only insert newlines. - None is the most compact representation. Since the default - item separator is ', ', the output might include trailing - whitespace when indent is specified. You can use - separators=(',', ': ') to avoid this. - - If specified, separators should be a (item_separator, key_separator) - tuple. The default is (', ', ': '). To get the most compact JSON - representation you should specify (',', ':') to eliminate whitespace. - - If specified, default is a function that gets called for objects - that can't otherwise be serialized. It should return a JSON encodable - version of the object or raise a ``TypeError``. - - If encoding is not None, then all input strings will be - transformed into unicode using that encoding prior to JSON-encoding. - The default is UTF-8. - - """ - - self.skipkeys = skipkeys - self.ensure_ascii = ensure_ascii - self.check_circular = check_circular - self.allow_nan = allow_nan - self.sort_keys = sort_keys - self.indent = indent - if separators is not None: - self.item_separator, self.key_separator = separators - if default is not None: - self.default = default - self.encoding = encoding - - def default(self, o): - """Implement this method in a subclass such that it returns - a serializable object for ``o``, or calls the base implementation - (to raise a ``TypeError``). - - For example, to support arbitrary iterators, you could - implement default like this:: - - def default(self, o): - try: - iterable = iter(o) - except TypeError: - pass - else: - return list(iterable) - # Let the base class default method raise the TypeError - return JSONEncoder.default(self, o) - - """ - raise TypeError(repr(o) + " is not JSON serializable") - - def encode(self, o): - """Return a JSON string representation of a Python data structure. - - >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) - '{"foo": ["bar", "baz"]}' - - """ - # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) - if self.ensure_ascii: - return encode_basestring_ascii(o) - else: - return encode_basestring(o) - # This doesn't pass the iterator directly to ''.join() because the - # exceptions aren't as detailed. The list call should be roughly - # equivalent to the PySequence_Fast that ''.join() would do. - chunks = self.iterencode(o, _one_shot=True) - if not isinstance(chunks, (list, tuple)): - chunks = list(chunks) - return ''.join(chunks) - - def iterencode(self, o, _one_shot=False): - """Encode the given object and yield each string - representation as available. - - For example:: - - for chunk in JSONEncoder().iterencode(bigobject): - mysocket.write(chunk) - - """ - if self.check_circular: - markers = {} - else: - markers = None - if self.ensure_ascii: - _encoder = encode_basestring_ascii - else: - _encoder = encode_basestring - if self.encoding != 'utf-8': - def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): - if isinstance(o, str): - o = o.decode(_encoding) - return _orig_encoder(o) - - def floatstr(o, allow_nan=self.allow_nan, - _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): - # Check for specials. Note that this type of test is processor - # and/or platform-specific, so do tests which don't depend on the - # internals. - - if o != o: - text = 'NaN' - elif o == _inf: - text = 'Infinity' - elif o == _neginf: - text = '-Infinity' - else: - return _repr(o) - - if not allow_nan: - raise ValueError( - "Out of range float values are not JSON compliant: " + - repr(o)) - - return text - - - if (_one_shot and c_make_encoder is not None - and self.indent is None and not self.sort_keys): - _iterencode = c_make_encoder( - markers, self.default, _encoder, self.indent, - self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, self.allow_nan) - else: - _iterencode = _make_iterencode( - markers, self.default, _encoder, self.indent, floatstr, - self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot) - return _iterencode(o, 0) - -def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, - _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, - ## HACK: hand-optimized bytecode; turn globals into locals - ValueError=ValueError, - basestring=basestring, - dict=dict, - float=float, - id=id, - int=int, - isinstance=isinstance, - list=list, - long=long, - str=str, - tuple=tuple, - ): - - def _iterencode_list(lst, _current_indent_level): - if not lst: - yield '[]' - return - if markers is not None: - markerid = id(lst) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = lst - buf = '[' - if _indent is not None: - _current_indent_level += 1 - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) - separator = _item_separator + newline_indent - buf += newline_indent - else: - newline_indent = None - separator = _item_separator - first = True - for value in lst: - if first: - first = False - else: - buf = separator - if isinstance(value, basestring): - yield buf + _encoder(value) - elif value is None: - yield buf + 'null' - elif value is True: - yield buf + 'true' - elif value is False: - yield buf + 'false' - elif isinstance(value, (int, long)): - yield buf + str(value) - elif isinstance(value, float): - yield buf + _floatstr(value) - else: - yield buf - if isinstance(value, (list, tuple)): - chunks = _iterencode_list(value, _current_indent_level) - elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) - else: - chunks = _iterencode(value, _current_indent_level) - for chunk in chunks: - yield chunk - if newline_indent is not None: - _current_indent_level -= 1 - yield '\n' + (' ' * (_indent * _current_indent_level)) - yield ']' - if markers is not None: - del markers[markerid] - - def _iterencode_dict(dct, _current_indent_level): - if not dct: - yield '{}' - return - if markers is not None: - markerid = id(dct) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = dct - yield '{' - if _indent is not None: - _current_indent_level += 1 - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) - item_separator = _item_separator + newline_indent - yield newline_indent - else: - newline_indent = None - item_separator = _item_separator - first = True - if _sort_keys: - items = sorted(dct.items(), key=lambda kv: kv[0]) - else: - items = dct.iteritems() - for key, value in items: - if isinstance(key, basestring): - pass - # JavaScript is weakly typed for these, so it makes sense to - # also allow them. Many encoders seem to do something like this. - elif isinstance(key, float): - key = _floatstr(key) - elif key is True: - key = 'true' - elif key is False: - key = 'false' - elif key is None: - key = 'null' - elif isinstance(key, (int, long)): - key = str(key) - elif _skipkeys: - continue - else: - raise TypeError("key " + repr(key) + " is not a string") - if first: - first = False - else: - yield item_separator - yield _encoder(key) - yield _key_separator - if isinstance(value, basestring): - yield _encoder(value) - elif value is None: - yield 'null' - elif value is True: - yield 'true' - elif value is False: - yield 'false' - elif isinstance(value, (int, long)): - yield str(value) - elif isinstance(value, float): - yield _floatstr(value) - else: - if isinstance(value, (list, tuple)): - chunks = _iterencode_list(value, _current_indent_level) - elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) - else: - chunks = _iterencode(value, _current_indent_level) - for chunk in chunks: - yield chunk - if newline_indent is not None: - _current_indent_level -= 1 - yield '\n' + (' ' * (_indent * _current_indent_level)) - yield '}' - if markers is not None: - del markers[markerid] - - def _iterencode(o, _current_indent_level): - if isinstance(o, basestring): - yield _encoder(o) - elif o is None: - yield 'null' - elif o is True: - yield 'true' - elif o is False: - yield 'false' - elif isinstance(o, (int, long)): - yield str(o) - elif isinstance(o, float): - yield _floatstr(o) - elif isinstance(o, (list, tuple)): - for chunk in _iterencode_list(o, _current_indent_level): - yield chunk - elif isinstance(o, dict): - for chunk in _iterencode_dict(o, _current_indent_level): - yield chunk - else: - if markers is not None: - markerid = id(o) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = o - o = _default(o) - for chunk in _iterencode(o, _current_indent_level): - yield chunk - if markers is not None: - del markers[markerid] - - return _iterencode diff --git a/python/Lib/json/scanner.py b/python/Lib/json/scanner.py deleted file mode 100755 index 74e6805155..0000000000 --- a/python/Lib/json/scanner.py +++ /dev/null @@ -1,67 +0,0 @@ -"""JSON token scanner -""" -import re -try: - from _json import make_scanner as c_make_scanner -except ImportError: - c_make_scanner = None - -__all__ = ['make_scanner'] - -NUMBER_RE = re.compile( - r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', - (re.VERBOSE | re.MULTILINE | re.DOTALL)) - -def py_make_scanner(context): - parse_object = context.parse_object - parse_array = context.parse_array - parse_string = context.parse_string - match_number = NUMBER_RE.match - encoding = context.encoding - strict = context.strict - parse_float = context.parse_float - parse_int = context.parse_int - parse_constant = context.parse_constant - object_hook = context.object_hook - object_pairs_hook = context.object_pairs_hook - - def _scan_once(string, idx): - try: - nextchar = string[idx] - except IndexError: - raise StopIteration - - if nextchar == '"': - return parse_string(string, idx + 1, encoding, strict) - elif nextchar == '{': - return parse_object((string, idx + 1), encoding, strict, - _scan_once, object_hook, object_pairs_hook) - elif nextchar == '[': - return parse_array((string, idx + 1), _scan_once) - elif nextchar == 'n' and string[idx:idx + 4] == 'null': - return None, idx + 4 - elif nextchar == 't' and string[idx:idx + 4] == 'true': - return True, idx + 4 - elif nextchar == 'f' and string[idx:idx + 5] == 'false': - return False, idx + 5 - - m = match_number(string, idx) - if m is not None: - integer, frac, exp = m.groups() - if frac or exp: - res = parse_float(integer + (frac or '') + (exp or '')) - else: - res = parse_int(integer) - return res, m.end() - elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': - return parse_constant('NaN'), idx + 3 - elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': - return parse_constant('Infinity'), idx + 8 - elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': - return parse_constant('-Infinity'), idx + 9 - else: - raise StopIteration - - return _scan_once - -make_scanner = c_make_scanner or py_make_scanner diff --git a/python/Lib/json/tool.py b/python/Lib/json/tool.py deleted file mode 100755 index fc5d74923d..0000000000 --- a/python/Lib/json/tool.py +++ /dev/null @@ -1,40 +0,0 @@ -r"""Command-line tool to validate and pretty-print JSON - -Usage:: - - $ echo '{"json":"obj"}' | python -m json.tool - { - "json": "obj" - } - $ echo '{ 1.2:3.4}' | python -m json.tool - Expecting property name enclosed in double quotes: line 1 column 3 (char 2) - -""" -import sys -import json - -def main(): - if len(sys.argv) == 1: - infile = sys.stdin - outfile = sys.stdout - elif len(sys.argv) == 2: - infile = open(sys.argv[1], 'rb') - outfile = sys.stdout - elif len(sys.argv) == 3: - infile = open(sys.argv[1], 'rb') - outfile = open(sys.argv[2], 'wb') - else: - raise SystemExit(sys.argv[0] + " [infile [outfile]]") - with infile: - try: - obj = json.load(infile) - except ValueError, e: - raise SystemExit(e) - with outfile: - json.dump(obj, outfile, sort_keys=True, - indent=4, separators=(',', ': ')) - outfile.write('\n') - - -if __name__ == '__main__': - main() diff --git a/python/Lib/keyword.py b/python/Lib/keyword.py deleted file mode 100755 index 69794bda8c..0000000000 --- a/python/Lib/keyword.py +++ /dev/null @@ -1,93 +0,0 @@ -#! /usr/bin/env python - -"""Keywords (from "graminit.c") - -This file is automatically generated; please don't muck it up! - -To update the symbols in this file, 'cd' to the top directory of -the python source tree after building the interpreter and run: - - ./python Lib/keyword.py -""" - -__all__ = ["iskeyword", "kwlist"] - -kwlist = [ -#--start keywords-- - 'and', - 'as', - 'assert', - 'break', - 'class', - 'continue', - 'def', - 'del', - 'elif', - 'else', - 'except', - 'exec', - 'finally', - 'for', - 'from', - 'global', - 'if', - 'import', - 'in', - 'is', - 'lambda', - 'not', - 'or', - 'pass', - 'print', - 'raise', - 'return', - 'try', - 'while', - 'with', - 'yield', -#--end keywords-- - ] - -iskeyword = frozenset(kwlist).__contains__ - -def main(): - import sys, re - - args = sys.argv[1:] - iptfile = args and args[0] or "Python/graminit.c" - if len(args) > 1: optfile = args[1] - else: optfile = "Lib/keyword.py" - - # scan the source file for keywords - fp = open(iptfile) - strprog = re.compile('"([^"]+)"') - lines = [] - for line in fp: - if '{1, "' in line: - match = strprog.search(line) - if match: - lines.append(" '" + match.group(1) + "',\n") - fp.close() - lines.sort() - - # load the output skeleton from the target - fp = open(optfile) - format = fp.readlines() - fp.close() - - # insert the lines of keywords - try: - start = format.index("#--start keywords--\n") + 1 - end = format.index("#--end keywords--\n") - format[start:end] = lines - except ValueError: - sys.stderr.write("target does not contain format markers\n") - sys.exit(1) - - # write the output file - fp = open(optfile, 'w') - fp.write(''.join(format)) - fp.close() - -if __name__ == "__main__": - main() diff --git a/python/Lib/linecache.py b/python/Lib/linecache.py deleted file mode 100755 index 4b97be3f05..0000000000 --- a/python/Lib/linecache.py +++ /dev/null @@ -1,139 +0,0 @@ -"""Cache lines from files. - -This is intended to read lines from modules imported -- hence if a filename -is not found, it will look down the module search path for a file by -that name. -""" - -import sys -import os - -__all__ = ["getline", "clearcache", "checkcache"] - -def getline(filename, lineno, module_globals=None): - lines = getlines(filename, module_globals) - if 1 <= lineno <= len(lines): - return lines[lineno-1] - else: - return '' - - -# The cache - -cache = {} # The cache - - -def clearcache(): - """Clear the cache entirely.""" - - global cache - cache = {} - - -def getlines(filename, module_globals=None): - """Get the lines for a file from the cache. - Update the cache if it doesn't contain an entry for this file already.""" - - if filename in cache: - return cache[filename][2] - - try: - return updatecache(filename, module_globals) - except MemoryError: - clearcache() - return [] - - -def checkcache(filename=None): - """Discard cache entries that are out of date. - (This is not checked upon each call!)""" - - if filename is None: - filenames = cache.keys() - else: - if filename in cache: - filenames = [filename] - else: - return - - for filename in filenames: - size, mtime, lines, fullname = cache[filename] - if mtime is None: - continue # no-op for files loaded via a __loader__ - try: - stat = os.stat(fullname) - except os.error: - del cache[filename] - continue - if size != stat.st_size or mtime != stat.st_mtime: - del cache[filename] - - -def updatecache(filename, module_globals=None): - """Update a cache entry and return its list of lines. - If something's wrong, print a message, discard the cache entry, - and return an empty list.""" - - if filename in cache: - del cache[filename] - if not filename or (filename.startswith('<') and filename.endswith('>')): - return [] - - fullname = filename - try: - stat = os.stat(fullname) - except OSError: - basename = filename - - # Try for a __loader__, if available - if module_globals and '__loader__' in module_globals: - name = module_globals.get('__name__') - loader = module_globals['__loader__'] - get_source = getattr(loader, 'get_source', None) - - if name and get_source: - try: - data = get_source(name) - except (ImportError, IOError): - pass - else: - if data is None: - # No luck, the PEP302 loader cannot find the source - # for this module. - return [] - cache[filename] = ( - len(data), None, - [line+'\n' for line in data.splitlines()], fullname - ) - return cache[filename][2] - - # Try looking through the module search path, which is only useful - # when handling a relative filename. - if os.path.isabs(filename): - return [] - - for dirname in sys.path: - # When using imputil, sys.path may contain things other than - # strings; ignore them when it happens. - try: - fullname = os.path.join(dirname, basename) - except (TypeError, AttributeError): - # Not sufficiently string-like to do anything useful with. - continue - try: - stat = os.stat(fullname) - break - except os.error: - pass - else: - return [] - try: - with open(fullname, 'rU') as fp: - lines = fp.readlines() - except IOError: - return [] - if lines and not lines[-1].endswith('\n'): - lines[-1] += '\n' - size, mtime = stat.st_size, stat.st_mtime - cache[filename] = size, mtime, lines, fullname - return lines diff --git a/python/Lib/locale.py b/python/Lib/locale.py deleted file mode 100755 index 5aab163e5a..0000000000 --- a/python/Lib/locale.py +++ /dev/null @@ -1,2064 +0,0 @@ -"""Locale support module. - -The module provides low-level access to the C lib's locale APIs and adds high -level number formatting APIs as well as a locale aliasing engine to complement -these. - -The aliasing engine includes support for many commonly used locale names and -maps them to values suitable for passing to the C lib's setlocale() function. It -also includes default encodings for all supported locale names. -""" - -import sys -import encodings -import encodings.aliases -import re -import operator -import functools - -# keep a copy of the builtin str type, because 'str' name is overridden -# in globals by a function below -_str = str - -try: - _unicode = unicode -except NameError: - # If Python is built without Unicode support, the unicode type - # will not exist. Fake one. - class _unicode(object): - pass - -# Try importing the _locale module. -# -# If this fails, fall back on a basic 'C' locale emulation. - -# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before -# trying the import. So __all__ is also fiddled at the end of the file. -__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error", - "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm", - "str", "atof", "atoi", "format", "format_string", "currency", - "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY", - "LC_NUMERIC", "LC_ALL", "CHAR_MAX"] - -try: - - from _locale import * - -except ImportError: - - # Locale emulation - - CHAR_MAX = 127 - LC_ALL = 6 - LC_COLLATE = 3 - LC_CTYPE = 0 - LC_MESSAGES = 5 - LC_MONETARY = 4 - LC_NUMERIC = 1 - LC_TIME = 2 - Error = ValueError - - def localeconv(): - """ localeconv() -> dict. - Returns numeric and monetary locale-specific parameters. - """ - # 'C' locale default values - return {'grouping': [127], - 'currency_symbol': '', - 'n_sign_posn': 127, - 'p_cs_precedes': 127, - 'n_cs_precedes': 127, - 'mon_grouping': [], - 'n_sep_by_space': 127, - 'decimal_point': '.', - 'negative_sign': '', - 'positive_sign': '', - 'p_sep_by_space': 127, - 'int_curr_symbol': '', - 'p_sign_posn': 127, - 'thousands_sep': '', - 'mon_thousands_sep': '', - 'frac_digits': 127, - 'mon_decimal_point': '', - 'int_frac_digits': 127} - - def setlocale(category, value=None): - """ setlocale(integer,string=None) -> string. - Activates/queries locale processing. - """ - if value not in (None, '', 'C'): - raise Error, '_locale emulation only supports "C" locale' - return 'C' - - def strcoll(a,b): - """ strcoll(string,string) -> int. - Compares two strings according to the locale. - """ - return cmp(a,b) - - def strxfrm(s): - """ strxfrm(string) -> string. - Returns a string that behaves for cmp locale-aware. - """ - return s - - -_localeconv = localeconv - -# With this dict, you can override some items of localeconv's return value. -# This is useful for testing purposes. -_override_localeconv = {} - -@functools.wraps(_localeconv) -def localeconv(): - d = _localeconv() - if _override_localeconv: - d.update(_override_localeconv) - return d - - -### Number formatting APIs - -# Author: Martin von Loewis -# improved by Georg Brandl - -# Iterate over grouping intervals -def _grouping_intervals(grouping): - last_interval = None - for interval in grouping: - # if grouping is -1, we are done - if interval == CHAR_MAX: - return - # 0: re-use last group ad infinitum - if interval == 0: - if last_interval is None: - raise ValueError("invalid grouping") - while True: - yield last_interval - yield interval - last_interval = interval - -#perform the grouping from right to left -def _group(s, monetary=False): - conv = localeconv() - thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep'] - grouping = conv[monetary and 'mon_grouping' or 'grouping'] - if not grouping: - return (s, 0) - if s[-1] == ' ': - stripped = s.rstrip() - right_spaces = s[len(stripped):] - s = stripped - else: - right_spaces = '' - left_spaces = '' - groups = [] - for interval in _grouping_intervals(grouping): - if not s or s[-1] not in "0123456789": - # only non-digit characters remain (sign, spaces) - left_spaces = s - s = '' - break - groups.append(s[-interval:]) - s = s[:-interval] - if s: - groups.append(s) - groups.reverse() - return ( - left_spaces + thousands_sep.join(groups) + right_spaces, - len(thousands_sep) * (len(groups) - 1) - ) - -# Strip a given amount of excess padding from the given string -def _strip_padding(s, amount): - lpos = 0 - while amount and s[lpos] == ' ': - lpos += 1 - amount -= 1 - rpos = len(s) - 1 - while amount and s[rpos] == ' ': - rpos -= 1 - amount -= 1 - return s[lpos:rpos+1] - -_percent_re = re.compile(r'%(?:\((?P.*?)\))?' - r'(?P[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]') - -def format(percent, value, grouping=False, monetary=False, *additional): - """Returns the locale-aware substitution of a %? specifier - (percent). - - additional is for format strings which contain one or more - '*' modifiers.""" - # this is only for one-percent-specifier strings and this should be checked - match = _percent_re.match(percent) - if not match or len(match.group())!= len(percent): - raise ValueError(("format() must be given exactly one %%char " - "format specifier, %s not valid") % repr(percent)) - return _format(percent, value, grouping, monetary, *additional) - -def _format(percent, value, grouping=False, monetary=False, *additional): - if additional: - formatted = percent % ((value,) + additional) - else: - formatted = percent % value - # floats and decimal ints need special action! - if percent[-1] in 'eEfFgG': - seps = 0 - parts = formatted.split('.') - if grouping: - parts[0], seps = _group(parts[0], monetary=monetary) - decimal_point = localeconv()[monetary and 'mon_decimal_point' - or 'decimal_point'] - formatted = decimal_point.join(parts) - if seps: - formatted = _strip_padding(formatted, seps) - elif percent[-1] in 'diu': - seps = 0 - if grouping: - formatted, seps = _group(formatted, monetary=monetary) - if seps: - formatted = _strip_padding(formatted, seps) - return formatted - -def format_string(f, val, grouping=False): - """Formats a string in the same way that the % formatting would use, - but takes the current locale into account. - Grouping is applied if the third parameter is true.""" - percents = list(_percent_re.finditer(f)) - new_f = _percent_re.sub('%s', f) - - if operator.isMappingType(val): - new_val = [] - for perc in percents: - if perc.group()[-1]=='%': - new_val.append('%') - else: - new_val.append(format(perc.group(), val, grouping)) - else: - if not isinstance(val, tuple): - val = (val,) - new_val = [] - i = 0 - for perc in percents: - if perc.group()[-1]=='%': - new_val.append('%') - else: - starcount = perc.group('modifiers').count('*') - new_val.append(_format(perc.group(), - val[i], - grouping, - False, - *val[i+1:i+1+starcount])) - i += (1 + starcount) - val = tuple(new_val) - - return new_f % val - -def currency(val, symbol=True, grouping=False, international=False): - """Formats val according to the currency settings - in the current locale.""" - conv = localeconv() - - # check for illegal values - digits = conv[international and 'int_frac_digits' or 'frac_digits'] - if digits == 127: - raise ValueError("Currency formatting is not possible using " - "the 'C' locale.") - - s = format('%%.%if' % digits, abs(val), grouping, monetary=True) - # '<' and '>' are markers if the sign must be inserted between symbol and value - s = '<' + s + '>' - - if symbol: - smb = conv[international and 'int_curr_symbol' or 'currency_symbol'] - precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes'] - separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space'] - - if precedes: - s = smb + (separated and ' ' or '') + s - else: - s = s + (separated and ' ' or '') + smb - - sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn'] - sign = conv[val<0 and 'negative_sign' or 'positive_sign'] - - if sign_pos == 0: - s = '(' + s + ')' - elif sign_pos == 1: - s = sign + s - elif sign_pos == 2: - s = s + sign - elif sign_pos == 3: - s = s.replace('<', sign) - elif sign_pos == 4: - s = s.replace('>', sign) - else: - # the default if nothing specified; - # this should be the most fitting sign position - s = sign + s - - return s.replace('<', '').replace('>', '') - -def str(val): - """Convert float to string, taking the locale into account.""" - return format("%.12g", val) - -def atof(string, func=float): - "Parses a string as a float according to the locale settings." - #First, get rid of the grouping - ts = localeconv()['thousands_sep'] - if ts: - string = string.replace(ts, '') - #next, replace the decimal point with a dot - dd = localeconv()['decimal_point'] - if dd: - string = string.replace(dd, '.') - #finally, parse the string - return func(string) - -def atoi(str): - "Converts a string to an integer according to the locale settings." - return atof(str, int) - -def _test(): - setlocale(LC_ALL, "") - #do grouping - s1 = format("%d", 123456789,1) - print s1, "is", atoi(s1) - #standard formatting - s1 = str(3.14) - print s1, "is", atof(s1) - -### Locale name aliasing engine - -# Author: Marc-Andre Lemburg, mal@lemburg.com -# Various tweaks by Fredrik Lundh - -# store away the low-level version of setlocale (it's -# overridden below) -_setlocale = setlocale - -# Avoid relying on the locale-dependent .lower() method -# (see issue #1813). -_ascii_lower_map = ''.join( - chr(x + 32 if x >= ord('A') and x <= ord('Z') else x) - for x in range(256) -) - -def _replace_encoding(code, encoding): - if '.' in code: - langname = code[:code.index('.')] - else: - langname = code - # Convert the encoding to a C lib compatible encoding string - norm_encoding = encodings.normalize_encoding(encoding) - #print('norm encoding: %r' % norm_encoding) - norm_encoding = encodings.aliases.aliases.get(norm_encoding, - norm_encoding) - #print('aliased encoding: %r' % norm_encoding) - encoding = locale_encoding_alias.get(norm_encoding, - norm_encoding) - #print('found encoding %r' % encoding) - return langname + '.' + encoding - -def normalize(localename): - - """ Returns a normalized locale code for the given locale - name. - - The returned locale code is formatted for use with - setlocale(). - - If normalization fails, the original name is returned - unchanged. - - If the given encoding is not known, the function defaults to - the default encoding for the locale code just like setlocale() - does. - - """ - # Normalize the locale name and extract the encoding and modifier - if isinstance(localename, _unicode): - localename = localename.encode('ascii') - code = localename.translate(_ascii_lower_map) - if ':' in code: - # ':' is sometimes used as encoding delimiter. - code = code.replace(':', '.') - if '@' in code: - code, modifier = code.split('@', 1) - else: - modifier = '' - if '.' in code: - langname, encoding = code.split('.')[:2] - else: - langname = code - encoding = '' - - # First lookup: fullname (possibly with encoding and modifier) - lang_enc = langname - if encoding: - norm_encoding = encoding.replace('-', '') - norm_encoding = norm_encoding.replace('_', '') - lang_enc += '.' + norm_encoding - lookup_name = lang_enc - if modifier: - lookup_name += '@' + modifier - code = locale_alias.get(lookup_name, None) - if code is not None: - return code - #print('first lookup failed') - - if modifier: - # Second try: fullname without modifier (possibly with encoding) - code = locale_alias.get(lang_enc, None) - if code is not None: - #print('lookup without modifier succeeded') - if '@' not in code: - return code + '@' + modifier - if code.split('@', 1)[1].translate(_ascii_lower_map) == modifier: - return code - #print('second lookup failed') - - if encoding: - # Third try: langname (without encoding, possibly with modifier) - lookup_name = langname - if modifier: - lookup_name += '@' + modifier - code = locale_alias.get(lookup_name, None) - if code is not None: - #print('lookup without encoding succeeded') - if '@' not in code: - return _replace_encoding(code, encoding) - code, modifier = code.split('@', 1) - return _replace_encoding(code, encoding) + '@' + modifier - - if modifier: - # Fourth try: langname (without encoding and modifier) - code = locale_alias.get(langname, None) - if code is not None: - #print('lookup without modifier and encoding succeeded') - if '@' not in code: - return _replace_encoding(code, encoding) + '@' + modifier - code, defmod = code.split('@', 1) - if defmod.translate(_ascii_lower_map) == modifier: - return _replace_encoding(code, encoding) + '@' + defmod - - return localename - -def _parse_localename(localename): - - """ Parses the locale code for localename and returns the - result as tuple (language code, encoding). - - The localename is normalized and passed through the locale - alias engine. A ValueError is raised in case the locale name - cannot be parsed. - - The language code corresponds to RFC 1766. code and encoding - can be None in case the values cannot be determined or are - unknown to this implementation. - - """ - code = normalize(localename) - if '@' in code: - # Deal with locale modifiers - code, modifier = code.split('@', 1) - if modifier == 'euro' and '.' not in code: - # Assume Latin-9 for @euro locales. This is bogus, - # since some systems may use other encodings for these - # locales. Also, we ignore other modifiers. - return code, 'iso-8859-15' - - if '.' in code: - return tuple(code.split('.')[:2]) - elif code == 'C': - return None, None - raise ValueError, 'unknown locale: %s' % localename - -def _build_localename(localetuple): - - """ Builds a locale code from the given tuple (language code, - encoding). - - No aliasing or normalizing takes place. - - """ - language, encoding = localetuple - if language is None: - language = 'C' - if encoding is None: - return language - else: - return language + '.' + encoding - -def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')): - - """ Tries to determine the default locale settings and returns - them as tuple (language code, encoding). - - According to POSIX, a program which has not called - setlocale(LC_ALL, "") runs using the portable 'C' locale. - Calling setlocale(LC_ALL, "") lets it use the default locale as - defined by the LANG variable. Since we don't want to interfere - with the current locale setting we thus emulate the behavior - in the way described above. - - To maintain compatibility with other platforms, not only the - LANG variable is tested, but a list of variables given as - envvars parameter. The first found to be defined will be - used. envvars defaults to the search path used in GNU gettext; - it must always contain the variable name 'LANG'. - - Except for the code 'C', the language code corresponds to RFC - 1766. code and encoding can be None in case the values cannot - be determined. - - """ - - try: - # check if it's supported by the _locale module - import _locale - code, encoding = _locale._getdefaultlocale() - except (ImportError, AttributeError): - pass - else: - # make sure the code/encoding values are valid - if sys.platform == "win32" and code and code[:2] == "0x": - # map windows language identifier to language name - code = windows_locale.get(int(code, 0)) - # ...add other platform-specific processing here, if - # necessary... - return code, encoding - - # fall back on POSIX behaviour - import os - lookup = os.environ.get - for variable in envvars: - localename = lookup(variable,None) - if localename: - if variable == 'LANGUAGE': - localename = localename.split(':')[0] - break - else: - localename = 'C' - return _parse_localename(localename) - - -def getlocale(category=LC_CTYPE): - - """ Returns the current setting for the given locale category as - tuple (language code, encoding). - - category may be one of the LC_* value except LC_ALL. It - defaults to LC_CTYPE. - - Except for the code 'C', the language code corresponds to RFC - 1766. code and encoding can be None in case the values cannot - be determined. - - """ - localename = _setlocale(category) - if category == LC_ALL and ';' in localename: - raise TypeError, 'category LC_ALL is not supported' - return _parse_localename(localename) - -def setlocale(category, locale=None): - - """ Set the locale for the given category. The locale can be - a string, an iterable of two strings (language code and encoding), - or None. - - Iterables are converted to strings using the locale aliasing - engine. Locale strings are passed directly to the C lib. - - category may be given as one of the LC_* values. - - """ - if locale and not isinstance(locale, (_str, _unicode)): - # convert to string - locale = normalize(_build_localename(locale)) - return _setlocale(category, locale) - -def resetlocale(category=LC_ALL): - - """ Sets the locale for category to the default setting. - - The default setting is determined by calling - getdefaultlocale(). category defaults to LC_ALL. - - """ - _setlocale(category, _build_localename(getdefaultlocale())) - -if sys.platform.startswith("win"): - # On Win32, this will return the ANSI code page - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using.""" - import _locale - return _locale._getdefaultlocale()[1] -else: - # On Unix, if CODESET is available, use that. - try: - CODESET - except NameError: - # Fall back to parsing environment variables :-( - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using, - by looking at environment variables.""" - return getdefaultlocale()[1] - else: - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using, - according to the system configuration.""" - if do_setlocale: - oldloc = setlocale(LC_CTYPE) - try: - setlocale(LC_CTYPE, "") - except Error: - pass - result = nl_langinfo(CODESET) - setlocale(LC_CTYPE, oldloc) - return result - else: - return nl_langinfo(CODESET) - - -### Database -# -# The following data was extracted from the locale.alias file which -# comes with X11 and then hand edited removing the explicit encoding -# definitions and adding some more aliases. The file is usually -# available as /usr/lib/X11/locale/locale.alias. -# - -# -# The local_encoding_alias table maps lowercase encoding alias names -# to C locale encoding names (case-sensitive). Note that normalize() -# first looks up the encoding in the encodings.aliases dictionary and -# then applies this mapping to find the correct C lib name for the -# encoding. -# -locale_encoding_alias = { - - # Mappings for non-standard encoding names used in locale names - '437': 'C', - 'c': 'C', - 'en': 'ISO8859-1', - 'jis': 'JIS7', - 'jis7': 'JIS7', - 'ajec': 'eucJP', - - # Mappings from Python codec names to C lib encoding names - 'ascii': 'ISO8859-1', - 'latin_1': 'ISO8859-1', - 'iso8859_1': 'ISO8859-1', - 'iso8859_10': 'ISO8859-10', - 'iso8859_11': 'ISO8859-11', - 'iso8859_13': 'ISO8859-13', - 'iso8859_14': 'ISO8859-14', - 'iso8859_15': 'ISO8859-15', - 'iso8859_16': 'ISO8859-16', - 'iso8859_2': 'ISO8859-2', - 'iso8859_3': 'ISO8859-3', - 'iso8859_4': 'ISO8859-4', - 'iso8859_5': 'ISO8859-5', - 'iso8859_6': 'ISO8859-6', - 'iso8859_7': 'ISO8859-7', - 'iso8859_8': 'ISO8859-8', - 'iso8859_9': 'ISO8859-9', - 'iso2022_jp': 'JIS7', - 'shift_jis': 'SJIS', - 'tactis': 'TACTIS', - 'euc_jp': 'eucJP', - 'euc_kr': 'eucKR', - 'utf_8': 'UTF-8', - 'koi8_r': 'KOI8-R', - 'koi8_u': 'KOI8-U', - # XXX This list is still incomplete. If you know more - # mappings, please file a bug report. Thanks. -} - -# -# The locale_alias table maps lowercase alias names to C locale names -# (case-sensitive). Encodings are always separated from the locale -# name using a dot ('.'); they should only be given in case the -# language name is needed to interpret the given encoding alias -# correctly (CJK codes often have this need). -# -# Note that the normalize() function which uses this tables -# removes '_' and '-' characters from the encoding part of the -# locale name before doing the lookup. This saves a lot of -# space in the table. -# -# MAL 2004-12-10: -# Updated alias mapping to most recent locale.alias file -# from X.org distribution using makelocalealias.py. -# -# These are the differences compared to the old mapping (Python 2.4 -# and older): -# -# updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' -# updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' -# updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' -# updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2' -# updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2' -# updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2' -# updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1' -# updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15' -# updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15' -# updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15' -# updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15' -# updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' -# updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' -# updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP' -# updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13' -# updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13' -# updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2' -# updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2' -# updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11' -# updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312' -# updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5' -# updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5' -# -# MAL 2008-05-30: -# Updated alias mapping to most recent locale.alias file -# from X.org distribution using makelocalealias.py. -# -# These are the differences compared to the old mapping (Python 2.5 -# and older): -# -# updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2' -# updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2' -# updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251' -# updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251' -# updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8' -# updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# -# AP 2010-04-12: -# Updated alias mapping to most recent locale.alias file -# from X.org distribution using makelocalealias.py. -# -# These are the differences compared to the old mapping (Python 2.6.5 -# and older): -# -# updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8' -# updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8' -# updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' -# updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' -# updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' -# updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' -# updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' -# updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' -# updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin' -# updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' -# updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin' -# updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8' -# updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' -# -# SS 2013-12-20: -# Updated alias mapping to most recent locale.alias file -# from X.org distribution using makelocalealias.py. -# -# These are the differences compared to the old mapping (Python 2.7.6 -# and older): -# -# updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' -# updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' -# updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' -# updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2' -# updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' -# updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' -# updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8' -# updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' -# updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8' -# updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' -# updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' -# -# SS 2014-10-01: -# Updated alias mapping with glibc 2.19 supported locales. - -locale_alias = { - 'a3': 'az_AZ.KOI8-C', - 'a3_az': 'az_AZ.KOI8-C', - 'a3_az.koi8c': 'az_AZ.KOI8-C', - 'a3_az.koic': 'az_AZ.KOI8-C', - 'aa_dj': 'aa_DJ.ISO8859-1', - 'aa_er': 'aa_ER.UTF-8', - 'aa_et': 'aa_ET.UTF-8', - 'af': 'af_ZA.ISO8859-1', - 'af_za': 'af_ZA.ISO8859-1', - 'af_za.iso88591': 'af_ZA.ISO8859-1', - 'am': 'am_ET.UTF-8', - 'am_et': 'am_ET.UTF-8', - 'american': 'en_US.ISO8859-1', - 'american.iso88591': 'en_US.ISO8859-1', - 'an_es': 'an_ES.ISO8859-15', - 'ar': 'ar_AA.ISO8859-6', - 'ar_aa': 'ar_AA.ISO8859-6', - 'ar_aa.iso88596': 'ar_AA.ISO8859-6', - 'ar_ae': 'ar_AE.ISO8859-6', - 'ar_ae.iso88596': 'ar_AE.ISO8859-6', - 'ar_bh': 'ar_BH.ISO8859-6', - 'ar_bh.iso88596': 'ar_BH.ISO8859-6', - 'ar_dz': 'ar_DZ.ISO8859-6', - 'ar_dz.iso88596': 'ar_DZ.ISO8859-6', - 'ar_eg': 'ar_EG.ISO8859-6', - 'ar_eg.iso88596': 'ar_EG.ISO8859-6', - 'ar_in': 'ar_IN.UTF-8', - 'ar_iq': 'ar_IQ.ISO8859-6', - 'ar_iq.iso88596': 'ar_IQ.ISO8859-6', - 'ar_jo': 'ar_JO.ISO8859-6', - 'ar_jo.iso88596': 'ar_JO.ISO8859-6', - 'ar_kw': 'ar_KW.ISO8859-6', - 'ar_kw.iso88596': 'ar_KW.ISO8859-6', - 'ar_lb': 'ar_LB.ISO8859-6', - 'ar_lb.iso88596': 'ar_LB.ISO8859-6', - 'ar_ly': 'ar_LY.ISO8859-6', - 'ar_ly.iso88596': 'ar_LY.ISO8859-6', - 'ar_ma': 'ar_MA.ISO8859-6', - 'ar_ma.iso88596': 'ar_MA.ISO8859-6', - 'ar_om': 'ar_OM.ISO8859-6', - 'ar_om.iso88596': 'ar_OM.ISO8859-6', - 'ar_qa': 'ar_QA.ISO8859-6', - 'ar_qa.iso88596': 'ar_QA.ISO8859-6', - 'ar_sa': 'ar_SA.ISO8859-6', - 'ar_sa.iso88596': 'ar_SA.ISO8859-6', - 'ar_sd': 'ar_SD.ISO8859-6', - 'ar_sd.iso88596': 'ar_SD.ISO8859-6', - 'ar_sy': 'ar_SY.ISO8859-6', - 'ar_sy.iso88596': 'ar_SY.ISO8859-6', - 'ar_tn': 'ar_TN.ISO8859-6', - 'ar_tn.iso88596': 'ar_TN.ISO8859-6', - 'ar_ye': 'ar_YE.ISO8859-6', - 'ar_ye.iso88596': 'ar_YE.ISO8859-6', - 'arabic': 'ar_AA.ISO8859-6', - 'arabic.iso88596': 'ar_AA.ISO8859-6', - 'as': 'as_IN.UTF-8', - 'as_in': 'as_IN.UTF-8', - 'ast_es': 'ast_ES.ISO8859-15', - 'ayc_pe': 'ayc_PE.UTF-8', - 'az': 'az_AZ.ISO8859-9E', - 'az_az': 'az_AZ.ISO8859-9E', - 'az_az.iso88599e': 'az_AZ.ISO8859-9E', - 'be': 'be_BY.CP1251', - 'be@latin': 'be_BY.UTF-8@latin', - 'be_bg.utf8': 'bg_BG.UTF-8', - 'be_by': 'be_BY.CP1251', - 'be_by.cp1251': 'be_BY.CP1251', - 'be_by.microsoftcp1251': 'be_BY.CP1251', - 'be_by.utf8@latin': 'be_BY.UTF-8@latin', - 'be_by@latin': 'be_BY.UTF-8@latin', - 'bem_zm': 'bem_ZM.UTF-8', - 'ber_dz': 'ber_DZ.UTF-8', - 'ber_ma': 'ber_MA.UTF-8', - 'bg': 'bg_BG.CP1251', - 'bg_bg': 'bg_BG.CP1251', - 'bg_bg.cp1251': 'bg_BG.CP1251', - 'bg_bg.iso88595': 'bg_BG.ISO8859-5', - 'bg_bg.koi8r': 'bg_BG.KOI8-R', - 'bg_bg.microsoftcp1251': 'bg_BG.CP1251', - 'bho_in': 'bho_IN.UTF-8', - 'bn_bd': 'bn_BD.UTF-8', - 'bn_in': 'bn_IN.UTF-8', - 'bo_cn': 'bo_CN.UTF-8', - 'bo_in': 'bo_IN.UTF-8', - 'bokmal': 'nb_NO.ISO8859-1', - 'bokm\xe5l': 'nb_NO.ISO8859-1', - 'br': 'br_FR.ISO8859-1', - 'br_fr': 'br_FR.ISO8859-1', - 'br_fr.iso88591': 'br_FR.ISO8859-1', - 'br_fr.iso885914': 'br_FR.ISO8859-14', - 'br_fr.iso885915': 'br_FR.ISO8859-15', - 'br_fr.iso885915@euro': 'br_FR.ISO8859-15', - 'br_fr.utf8@euro': 'br_FR.UTF-8', - 'br_fr@euro': 'br_FR.ISO8859-15', - 'brx_in': 'brx_IN.UTF-8', - 'bs': 'bs_BA.ISO8859-2', - 'bs_ba': 'bs_BA.ISO8859-2', - 'bs_ba.iso88592': 'bs_BA.ISO8859-2', - 'bulgarian': 'bg_BG.CP1251', - 'byn_er': 'byn_ER.UTF-8', - 'c': 'C', - 'c-french': 'fr_CA.ISO8859-1', - 'c-french.iso88591': 'fr_CA.ISO8859-1', - 'c.ascii': 'C', - 'c.en': 'C', - 'c.iso88591': 'en_US.ISO8859-1', - 'c.utf8': 'en_US.UTF-8', - 'c_c': 'C', - 'c_c.c': 'C', - 'ca': 'ca_ES.ISO8859-1', - 'ca_ad': 'ca_AD.ISO8859-1', - 'ca_ad.iso88591': 'ca_AD.ISO8859-1', - 'ca_ad.iso885915': 'ca_AD.ISO8859-15', - 'ca_ad.iso885915@euro': 'ca_AD.ISO8859-15', - 'ca_ad.utf8@euro': 'ca_AD.UTF-8', - 'ca_ad@euro': 'ca_AD.ISO8859-15', - 'ca_es': 'ca_ES.ISO8859-1', - 'ca_es.iso88591': 'ca_ES.ISO8859-1', - 'ca_es.iso885915': 'ca_ES.ISO8859-15', - 'ca_es.iso885915@euro': 'ca_ES.ISO8859-15', - 'ca_es.utf8@euro': 'ca_ES.UTF-8', - 'ca_es@valencia': 'ca_ES.ISO8859-15@valencia', - 'ca_es@euro': 'ca_ES.ISO8859-15', - 'ca_fr': 'ca_FR.ISO8859-1', - 'ca_fr.iso88591': 'ca_FR.ISO8859-1', - 'ca_fr.iso885915': 'ca_FR.ISO8859-15', - 'ca_fr.iso885915@euro': 'ca_FR.ISO8859-15', - 'ca_fr.utf8@euro': 'ca_FR.UTF-8', - 'ca_fr@euro': 'ca_FR.ISO8859-15', - 'ca_it': 'ca_IT.ISO8859-1', - 'ca_it.iso88591': 'ca_IT.ISO8859-1', - 'ca_it.iso885915': 'ca_IT.ISO8859-15', - 'ca_it.iso885915@euro': 'ca_IT.ISO8859-15', - 'ca_it.utf8@euro': 'ca_IT.UTF-8', - 'ca_it@euro': 'ca_IT.ISO8859-15', - 'catalan': 'ca_ES.ISO8859-1', - 'cextend': 'en_US.ISO8859-1', - 'cextend.en': 'en_US.ISO8859-1', - 'chinese-s': 'zh_CN.eucCN', - 'chinese-t': 'zh_TW.eucTW', - 'crh_ua': 'crh_UA.UTF-8', - 'croatian': 'hr_HR.ISO8859-2', - 'cs': 'cs_CZ.ISO8859-2', - 'cs_cs': 'cs_CZ.ISO8859-2', - 'cs_cs.iso88592': 'cs_CZ.ISO8859-2', - 'cs_cz': 'cs_CZ.ISO8859-2', - 'cs_cz.iso88592': 'cs_CZ.ISO8859-2', - 'csb_pl': 'csb_PL.UTF-8', - 'cv_ru': 'cv_RU.UTF-8', - 'cy': 'cy_GB.ISO8859-1', - 'cy_gb': 'cy_GB.ISO8859-1', - 'cy_gb.iso88591': 'cy_GB.ISO8859-1', - 'cy_gb.iso885914': 'cy_GB.ISO8859-14', - 'cy_gb.iso885915': 'cy_GB.ISO8859-15', - 'cy_gb@euro': 'cy_GB.ISO8859-15', - 'cz': 'cs_CZ.ISO8859-2', - 'cz_cz': 'cs_CZ.ISO8859-2', - 'czech': 'cs_CZ.ISO8859-2', - 'da': 'da_DK.ISO8859-1', - 'da.iso885915': 'da_DK.ISO8859-15', - 'da_dk': 'da_DK.ISO8859-1', - 'da_dk.88591': 'da_DK.ISO8859-1', - 'da_dk.885915': 'da_DK.ISO8859-15', - 'da_dk.iso88591': 'da_DK.ISO8859-1', - 'da_dk.iso885915': 'da_DK.ISO8859-15', - 'da_dk@euro': 'da_DK.ISO8859-15', - 'danish': 'da_DK.ISO8859-1', - 'danish.iso88591': 'da_DK.ISO8859-1', - 'dansk': 'da_DK.ISO8859-1', - 'de': 'de_DE.ISO8859-1', - 'de.iso885915': 'de_DE.ISO8859-15', - 'de_at': 'de_AT.ISO8859-1', - 'de_at.iso88591': 'de_AT.ISO8859-1', - 'de_at.iso885915': 'de_AT.ISO8859-15', - 'de_at.iso885915@euro': 'de_AT.ISO8859-15', - 'de_at.utf8@euro': 'de_AT.UTF-8', - 'de_at@euro': 'de_AT.ISO8859-15', - 'de_be': 'de_BE.ISO8859-1', - 'de_be.iso88591': 'de_BE.ISO8859-1', - 'de_be.iso885915': 'de_BE.ISO8859-15', - 'de_be.iso885915@euro': 'de_BE.ISO8859-15', - 'de_be.utf8@euro': 'de_BE.UTF-8', - 'de_be@euro': 'de_BE.ISO8859-15', - 'de_ch': 'de_CH.ISO8859-1', - 'de_ch.iso88591': 'de_CH.ISO8859-1', - 'de_ch.iso885915': 'de_CH.ISO8859-15', - 'de_ch@euro': 'de_CH.ISO8859-15', - 'de_de': 'de_DE.ISO8859-1', - 'de_de.88591': 'de_DE.ISO8859-1', - 'de_de.885915': 'de_DE.ISO8859-15', - 'de_de.885915@euro': 'de_DE.ISO8859-15', - 'de_de.iso88591': 'de_DE.ISO8859-1', - 'de_de.iso885915': 'de_DE.ISO8859-15', - 'de_de.iso885915@euro': 'de_DE.ISO8859-15', - 'de_de.utf8@euro': 'de_DE.UTF-8', - 'de_de@euro': 'de_DE.ISO8859-15', - 'de_li.utf8': 'de_LI.UTF-8', - 'de_lu': 'de_LU.ISO8859-1', - 'de_lu.iso88591': 'de_LU.ISO8859-1', - 'de_lu.iso885915': 'de_LU.ISO8859-15', - 'de_lu.iso885915@euro': 'de_LU.ISO8859-15', - 'de_lu.utf8@euro': 'de_LU.UTF-8', - 'de_lu@euro': 'de_LU.ISO8859-15', - 'deutsch': 'de_DE.ISO8859-1', - 'doi_in': 'doi_IN.UTF-8', - 'dutch': 'nl_NL.ISO8859-1', - 'dutch.iso88591': 'nl_BE.ISO8859-1', - 'dv_mv': 'dv_MV.UTF-8', - 'dz_bt': 'dz_BT.UTF-8', - 'ee': 'ee_EE.ISO8859-4', - 'ee_ee': 'ee_EE.ISO8859-4', - 'ee_ee.iso88594': 'ee_EE.ISO8859-4', - 'eesti': 'et_EE.ISO8859-1', - 'el': 'el_GR.ISO8859-7', - 'el_cy': 'el_CY.ISO8859-7', - 'el_gr': 'el_GR.ISO8859-7', - 'el_gr.iso88597': 'el_GR.ISO8859-7', - 'el_gr@euro': 'el_GR.ISO8859-15', - 'en': 'en_US.ISO8859-1', - 'en.iso88591': 'en_US.ISO8859-1', - 'en_ag': 'en_AG.UTF-8', - 'en_au': 'en_AU.ISO8859-1', - 'en_au.iso88591': 'en_AU.ISO8859-1', - 'en_be': 'en_BE.ISO8859-1', - 'en_be@euro': 'en_BE.ISO8859-15', - 'en_bw': 'en_BW.ISO8859-1', - 'en_bw.iso88591': 'en_BW.ISO8859-1', - 'en_ca': 'en_CA.ISO8859-1', - 'en_ca.iso88591': 'en_CA.ISO8859-1', - 'en_dk': 'en_DK.ISO8859-1', - 'en_dl.utf8': 'en_DL.UTF-8', - 'en_gb': 'en_GB.ISO8859-1', - 'en_gb.88591': 'en_GB.ISO8859-1', - 'en_gb.iso88591': 'en_GB.ISO8859-1', - 'en_gb.iso885915': 'en_GB.ISO8859-15', - 'en_gb@euro': 'en_GB.ISO8859-15', - 'en_hk': 'en_HK.ISO8859-1', - 'en_hk.iso88591': 'en_HK.ISO8859-1', - 'en_ie': 'en_IE.ISO8859-1', - 'en_ie.iso88591': 'en_IE.ISO8859-1', - 'en_ie.iso885915': 'en_IE.ISO8859-15', - 'en_ie.iso885915@euro': 'en_IE.ISO8859-15', - 'en_ie.utf8@euro': 'en_IE.UTF-8', - 'en_ie@euro': 'en_IE.ISO8859-15', - 'en_in': 'en_IN.ISO8859-1', - 'en_ng': 'en_NG.UTF-8', - 'en_nz': 'en_NZ.ISO8859-1', - 'en_nz.iso88591': 'en_NZ.ISO8859-1', - 'en_ph': 'en_PH.ISO8859-1', - 'en_ph.iso88591': 'en_PH.ISO8859-1', - 'en_sg': 'en_SG.ISO8859-1', - 'en_sg.iso88591': 'en_SG.ISO8859-1', - 'en_uk': 'en_GB.ISO8859-1', - 'en_us': 'en_US.ISO8859-1', - 'en_us.88591': 'en_US.ISO8859-1', - 'en_us.885915': 'en_US.ISO8859-15', - 'en_us.iso88591': 'en_US.ISO8859-1', - 'en_us.iso885915': 'en_US.ISO8859-15', - 'en_us.iso885915@euro': 'en_US.ISO8859-15', - 'en_us@euro': 'en_US.ISO8859-15', - 'en_us@euro@euro': 'en_US.ISO8859-15', - 'en_za': 'en_ZA.ISO8859-1', - 'en_za.88591': 'en_ZA.ISO8859-1', - 'en_za.iso88591': 'en_ZA.ISO8859-1', - 'en_za.iso885915': 'en_ZA.ISO8859-15', - 'en_za@euro': 'en_ZA.ISO8859-15', - 'en_zm': 'en_ZM.UTF-8', - 'en_zw': 'en_ZW.ISO8859-1', - 'en_zw.iso88591': 'en_ZW.ISO8859-1', - 'en_zw.utf8': 'en_ZS.UTF-8', - 'eng_gb': 'en_GB.ISO8859-1', - 'eng_gb.8859': 'en_GB.ISO8859-1', - 'english': 'en_EN.ISO8859-1', - 'english.iso88591': 'en_EN.ISO8859-1', - 'english_uk': 'en_GB.ISO8859-1', - 'english_uk.8859': 'en_GB.ISO8859-1', - 'english_united-states': 'en_US.ISO8859-1', - 'english_united-states.437': 'C', - 'english_us': 'en_US.ISO8859-1', - 'english_us.8859': 'en_US.ISO8859-1', - 'english_us.ascii': 'en_US.ISO8859-1', - 'eo': 'eo_XX.ISO8859-3', - 'eo.utf8': 'eo.UTF-8', - 'eo_eo': 'eo_EO.ISO8859-3', - 'eo_eo.iso88593': 'eo_EO.ISO8859-3', - 'eo_us.utf8': 'eo_US.UTF-8', - 'eo_xx': 'eo_XX.ISO8859-3', - 'eo_xx.iso88593': 'eo_XX.ISO8859-3', - 'es': 'es_ES.ISO8859-1', - 'es_ar': 'es_AR.ISO8859-1', - 'es_ar.iso88591': 'es_AR.ISO8859-1', - 'es_bo': 'es_BO.ISO8859-1', - 'es_bo.iso88591': 'es_BO.ISO8859-1', - 'es_cl': 'es_CL.ISO8859-1', - 'es_cl.iso88591': 'es_CL.ISO8859-1', - 'es_co': 'es_CO.ISO8859-1', - 'es_co.iso88591': 'es_CO.ISO8859-1', - 'es_cr': 'es_CR.ISO8859-1', - 'es_cr.iso88591': 'es_CR.ISO8859-1', - 'es_cu': 'es_CU.UTF-8', - 'es_do': 'es_DO.ISO8859-1', - 'es_do.iso88591': 'es_DO.ISO8859-1', - 'es_ec': 'es_EC.ISO8859-1', - 'es_ec.iso88591': 'es_EC.ISO8859-1', - 'es_es': 'es_ES.ISO8859-1', - 'es_es.88591': 'es_ES.ISO8859-1', - 'es_es.iso88591': 'es_ES.ISO8859-1', - 'es_es.iso885915': 'es_ES.ISO8859-15', - 'es_es.iso885915@euro': 'es_ES.ISO8859-15', - 'es_es.utf8@euro': 'es_ES.UTF-8', - 'es_es@euro': 'es_ES.ISO8859-15', - 'es_gt': 'es_GT.ISO8859-1', - 'es_gt.iso88591': 'es_GT.ISO8859-1', - 'es_hn': 'es_HN.ISO8859-1', - 'es_hn.iso88591': 'es_HN.ISO8859-1', - 'es_mx': 'es_MX.ISO8859-1', - 'es_mx.iso88591': 'es_MX.ISO8859-1', - 'es_ni': 'es_NI.ISO8859-1', - 'es_ni.iso88591': 'es_NI.ISO8859-1', - 'es_pa': 'es_PA.ISO8859-1', - 'es_pa.iso88591': 'es_PA.ISO8859-1', - 'es_pa.iso885915': 'es_PA.ISO8859-15', - 'es_pa@euro': 'es_PA.ISO8859-15', - 'es_pe': 'es_PE.ISO8859-1', - 'es_pe.iso88591': 'es_PE.ISO8859-1', - 'es_pe.iso885915': 'es_PE.ISO8859-15', - 'es_pe@euro': 'es_PE.ISO8859-15', - 'es_pr': 'es_PR.ISO8859-1', - 'es_pr.iso88591': 'es_PR.ISO8859-1', - 'es_py': 'es_PY.ISO8859-1', - 'es_py.iso88591': 'es_PY.ISO8859-1', - 'es_py.iso885915': 'es_PY.ISO8859-15', - 'es_py@euro': 'es_PY.ISO8859-15', - 'es_sv': 'es_SV.ISO8859-1', - 'es_sv.iso88591': 'es_SV.ISO8859-1', - 'es_sv.iso885915': 'es_SV.ISO8859-15', - 'es_sv@euro': 'es_SV.ISO8859-15', - 'es_us': 'es_US.ISO8859-1', - 'es_us.iso88591': 'es_US.ISO8859-1', - 'es_uy': 'es_UY.ISO8859-1', - 'es_uy.iso88591': 'es_UY.ISO8859-1', - 'es_uy.iso885915': 'es_UY.ISO8859-15', - 'es_uy@euro': 'es_UY.ISO8859-15', - 'es_ve': 'es_VE.ISO8859-1', - 'es_ve.iso88591': 'es_VE.ISO8859-1', - 'es_ve.iso885915': 'es_VE.ISO8859-15', - 'es_ve@euro': 'es_VE.ISO8859-15', - 'estonian': 'et_EE.ISO8859-1', - 'et': 'et_EE.ISO8859-15', - 'et_ee': 'et_EE.ISO8859-15', - 'et_ee.iso88591': 'et_EE.ISO8859-1', - 'et_ee.iso885913': 'et_EE.ISO8859-13', - 'et_ee.iso885915': 'et_EE.ISO8859-15', - 'et_ee.iso88594': 'et_EE.ISO8859-4', - 'et_ee@euro': 'et_EE.ISO8859-15', - 'eu': 'eu_ES.ISO8859-1', - 'eu_es': 'eu_ES.ISO8859-1', - 'eu_es.iso88591': 'eu_ES.ISO8859-1', - 'eu_es.iso885915': 'eu_ES.ISO8859-15', - 'eu_es.iso885915@euro': 'eu_ES.ISO8859-15', - 'eu_es.utf8@euro': 'eu_ES.UTF-8', - 'eu_es@euro': 'eu_ES.ISO8859-15', - 'eu_fr': 'eu_FR.ISO8859-1', - 'fa': 'fa_IR.UTF-8', - 'fa_ir': 'fa_IR.UTF-8', - 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342', - 'ff_sn': 'ff_SN.UTF-8', - 'fi': 'fi_FI.ISO8859-15', - 'fi.iso885915': 'fi_FI.ISO8859-15', - 'fi_fi': 'fi_FI.ISO8859-15', - 'fi_fi.88591': 'fi_FI.ISO8859-1', - 'fi_fi.iso88591': 'fi_FI.ISO8859-1', - 'fi_fi.iso885915': 'fi_FI.ISO8859-15', - 'fi_fi.iso885915@euro': 'fi_FI.ISO8859-15', - 'fi_fi.utf8@euro': 'fi_FI.UTF-8', - 'fi_fi@euro': 'fi_FI.ISO8859-15', - 'fil_ph': 'fil_PH.UTF-8', - 'finnish': 'fi_FI.ISO8859-1', - 'finnish.iso88591': 'fi_FI.ISO8859-1', - 'fo': 'fo_FO.ISO8859-1', - 'fo_fo': 'fo_FO.ISO8859-1', - 'fo_fo.iso88591': 'fo_FO.ISO8859-1', - 'fo_fo.iso885915': 'fo_FO.ISO8859-15', - 'fo_fo@euro': 'fo_FO.ISO8859-15', - 'fr': 'fr_FR.ISO8859-1', - 'fr.iso885915': 'fr_FR.ISO8859-15', - 'fr_be': 'fr_BE.ISO8859-1', - 'fr_be.88591': 'fr_BE.ISO8859-1', - 'fr_be.iso88591': 'fr_BE.ISO8859-1', - 'fr_be.iso885915': 'fr_BE.ISO8859-15', - 'fr_be.iso885915@euro': 'fr_BE.ISO8859-15', - 'fr_be.utf8@euro': 'fr_BE.UTF-8', - 'fr_be@euro': 'fr_BE.ISO8859-15', - 'fr_ca': 'fr_CA.ISO8859-1', - 'fr_ca.88591': 'fr_CA.ISO8859-1', - 'fr_ca.iso88591': 'fr_CA.ISO8859-1', - 'fr_ca.iso885915': 'fr_CA.ISO8859-15', - 'fr_ca@euro': 'fr_CA.ISO8859-15', - 'fr_ch': 'fr_CH.ISO8859-1', - 'fr_ch.88591': 'fr_CH.ISO8859-1', - 'fr_ch.iso88591': 'fr_CH.ISO8859-1', - 'fr_ch.iso885915': 'fr_CH.ISO8859-15', - 'fr_ch@euro': 'fr_CH.ISO8859-15', - 'fr_fr': 'fr_FR.ISO8859-1', - 'fr_fr.88591': 'fr_FR.ISO8859-1', - 'fr_fr.iso88591': 'fr_FR.ISO8859-1', - 'fr_fr.iso885915': 'fr_FR.ISO8859-15', - 'fr_fr.iso885915@euro': 'fr_FR.ISO8859-15', - 'fr_fr.utf8@euro': 'fr_FR.UTF-8', - 'fr_fr@euro': 'fr_FR.ISO8859-15', - 'fr_lu': 'fr_LU.ISO8859-1', - 'fr_lu.88591': 'fr_LU.ISO8859-1', - 'fr_lu.iso88591': 'fr_LU.ISO8859-1', - 'fr_lu.iso885915': 'fr_LU.ISO8859-15', - 'fr_lu.iso885915@euro': 'fr_LU.ISO8859-15', - 'fr_lu.utf8@euro': 'fr_LU.UTF-8', - 'fr_lu@euro': 'fr_LU.ISO8859-15', - 'fran\xe7ais': 'fr_FR.ISO8859-1', - 'fre_fr': 'fr_FR.ISO8859-1', - 'fre_fr.8859': 'fr_FR.ISO8859-1', - 'french': 'fr_FR.ISO8859-1', - 'french.iso88591': 'fr_CH.ISO8859-1', - 'french_france': 'fr_FR.ISO8859-1', - 'french_france.8859': 'fr_FR.ISO8859-1', - 'fur_it': 'fur_IT.UTF-8', - 'fy_de': 'fy_DE.UTF-8', - 'fy_nl': 'fy_NL.UTF-8', - 'ga': 'ga_IE.ISO8859-1', - 'ga_ie': 'ga_IE.ISO8859-1', - 'ga_ie.iso88591': 'ga_IE.ISO8859-1', - 'ga_ie.iso885914': 'ga_IE.ISO8859-14', - 'ga_ie.iso885915': 'ga_IE.ISO8859-15', - 'ga_ie.iso885915@euro': 'ga_IE.ISO8859-15', - 'ga_ie.utf8@euro': 'ga_IE.UTF-8', - 'ga_ie@euro': 'ga_IE.ISO8859-15', - 'galego': 'gl_ES.ISO8859-1', - 'galician': 'gl_ES.ISO8859-1', - 'gd': 'gd_GB.ISO8859-1', - 'gd_gb': 'gd_GB.ISO8859-1', - 'gd_gb.iso88591': 'gd_GB.ISO8859-1', - 'gd_gb.iso885914': 'gd_GB.ISO8859-14', - 'gd_gb.iso885915': 'gd_GB.ISO8859-15', - 'gd_gb@euro': 'gd_GB.ISO8859-15', - 'ger_de': 'de_DE.ISO8859-1', - 'ger_de.8859': 'de_DE.ISO8859-1', - 'german': 'de_DE.ISO8859-1', - 'german.iso88591': 'de_CH.ISO8859-1', - 'german_germany': 'de_DE.ISO8859-1', - 'german_germany.8859': 'de_DE.ISO8859-1', - 'gez_er': 'gez_ER.UTF-8', - 'gez_et': 'gez_ET.UTF-8', - 'gl': 'gl_ES.ISO8859-1', - 'gl_es': 'gl_ES.ISO8859-1', - 'gl_es.iso88591': 'gl_ES.ISO8859-1', - 'gl_es.iso885915': 'gl_ES.ISO8859-15', - 'gl_es.iso885915@euro': 'gl_ES.ISO8859-15', - 'gl_es.utf8@euro': 'gl_ES.UTF-8', - 'gl_es@euro': 'gl_ES.ISO8859-15', - 'greek': 'el_GR.ISO8859-7', - 'greek.iso88597': 'el_GR.ISO8859-7', - 'gu_in': 'gu_IN.UTF-8', - 'gv': 'gv_GB.ISO8859-1', - 'gv_gb': 'gv_GB.ISO8859-1', - 'gv_gb.iso88591': 'gv_GB.ISO8859-1', - 'gv_gb.iso885914': 'gv_GB.ISO8859-14', - 'gv_gb.iso885915': 'gv_GB.ISO8859-15', - 'gv_gb@euro': 'gv_GB.ISO8859-15', - 'ha_ng': 'ha_NG.UTF-8', - 'he': 'he_IL.ISO8859-8', - 'he_il': 'he_IL.ISO8859-8', - 'he_il.cp1255': 'he_IL.CP1255', - 'he_il.iso88598': 'he_IL.ISO8859-8', - 'he_il.microsoftcp1255': 'he_IL.CP1255', - 'hebrew': 'he_IL.ISO8859-8', - 'hebrew.iso88598': 'he_IL.ISO8859-8', - 'hi': 'hi_IN.ISCII-DEV', - 'hi_in': 'hi_IN.ISCII-DEV', - 'hi_in.isciidev': 'hi_IN.ISCII-DEV', - 'hne': 'hne_IN.UTF-8', - 'hne_in': 'hne_IN.UTF-8', - 'hr': 'hr_HR.ISO8859-2', - 'hr_hr': 'hr_HR.ISO8859-2', - 'hr_hr.iso88592': 'hr_HR.ISO8859-2', - 'hrvatski': 'hr_HR.ISO8859-2', - 'hsb_de': 'hsb_DE.ISO8859-2', - 'ht_ht': 'ht_HT.UTF-8', - 'hu': 'hu_HU.ISO8859-2', - 'hu_hu': 'hu_HU.ISO8859-2', - 'hu_hu.iso88592': 'hu_HU.ISO8859-2', - 'hungarian': 'hu_HU.ISO8859-2', - 'hy_am': 'hy_AM.UTF-8', - 'hy_am.armscii8': 'hy_AM.ARMSCII_8', - 'ia': 'ia.UTF-8', - 'ia_fr': 'ia_FR.UTF-8', - 'icelandic': 'is_IS.ISO8859-1', - 'icelandic.iso88591': 'is_IS.ISO8859-1', - 'id': 'id_ID.ISO8859-1', - 'id_id': 'id_ID.ISO8859-1', - 'ig_ng': 'ig_NG.UTF-8', - 'ik_ca': 'ik_CA.UTF-8', - 'in': 'id_ID.ISO8859-1', - 'in_id': 'id_ID.ISO8859-1', - 'is': 'is_IS.ISO8859-1', - 'is_is': 'is_IS.ISO8859-1', - 'is_is.iso88591': 'is_IS.ISO8859-1', - 'is_is.iso885915': 'is_IS.ISO8859-15', - 'is_is@euro': 'is_IS.ISO8859-15', - 'iso-8859-1': 'en_US.ISO8859-1', - 'iso-8859-15': 'en_US.ISO8859-15', - 'iso8859-1': 'en_US.ISO8859-1', - 'iso8859-15': 'en_US.ISO8859-15', - 'iso_8859_1': 'en_US.ISO8859-1', - 'iso_8859_15': 'en_US.ISO8859-15', - 'it': 'it_IT.ISO8859-1', - 'it.iso885915': 'it_IT.ISO8859-15', - 'it_ch': 'it_CH.ISO8859-1', - 'it_ch.iso88591': 'it_CH.ISO8859-1', - 'it_ch.iso885915': 'it_CH.ISO8859-15', - 'it_ch@euro': 'it_CH.ISO8859-15', - 'it_it': 'it_IT.ISO8859-1', - 'it_it.88591': 'it_IT.ISO8859-1', - 'it_it.iso88591': 'it_IT.ISO8859-1', - 'it_it.iso885915': 'it_IT.ISO8859-15', - 'it_it.iso885915@euro': 'it_IT.ISO8859-15', - 'it_it.utf8@euro': 'it_IT.UTF-8', - 'it_it@euro': 'it_IT.ISO8859-15', - 'italian': 'it_IT.ISO8859-1', - 'italian.iso88591': 'it_IT.ISO8859-1', - 'iu': 'iu_CA.NUNACOM-8', - 'iu_ca': 'iu_CA.NUNACOM-8', - 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8', - 'iw': 'he_IL.ISO8859-8', - 'iw_il': 'he_IL.ISO8859-8', - 'iw_il.iso88598': 'he_IL.ISO8859-8', - 'iw_il.utf8': 'iw_IL.UTF-8', - 'ja': 'ja_JP.eucJP', - 'ja.jis': 'ja_JP.JIS7', - 'ja.sjis': 'ja_JP.SJIS', - 'ja_jp': 'ja_JP.eucJP', - 'ja_jp.ajec': 'ja_JP.eucJP', - 'ja_jp.euc': 'ja_JP.eucJP', - 'ja_jp.eucjp': 'ja_JP.eucJP', - 'ja_jp.iso-2022-jp': 'ja_JP.JIS7', - 'ja_jp.iso2022jp': 'ja_JP.JIS7', - 'ja_jp.jis': 'ja_JP.JIS7', - 'ja_jp.jis7': 'ja_JP.JIS7', - 'ja_jp.mscode': 'ja_JP.SJIS', - 'ja_jp.pck': 'ja_JP.SJIS', - 'ja_jp.sjis': 'ja_JP.SJIS', - 'ja_jp.ujis': 'ja_JP.eucJP', - 'japan': 'ja_JP.eucJP', - 'japanese': 'ja_JP.eucJP', - 'japanese-euc': 'ja_JP.eucJP', - 'japanese.euc': 'ja_JP.eucJP', - 'japanese.sjis': 'ja_JP.SJIS', - 'jp_jp': 'ja_JP.eucJP', - 'ka': 'ka_GE.GEORGIAN-ACADEMY', - 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY', - 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY', - 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS', - 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY', - 'kk_kz': 'kk_KZ.RK1048', - 'kl': 'kl_GL.ISO8859-1', - 'kl_gl': 'kl_GL.ISO8859-1', - 'kl_gl.iso88591': 'kl_GL.ISO8859-1', - 'kl_gl.iso885915': 'kl_GL.ISO8859-15', - 'kl_gl@euro': 'kl_GL.ISO8859-15', - 'km_kh': 'km_KH.UTF-8', - 'kn': 'kn_IN.UTF-8', - 'kn_in': 'kn_IN.UTF-8', - 'ko': 'ko_KR.eucKR', - 'ko_kr': 'ko_KR.eucKR', - 'ko_kr.euc': 'ko_KR.eucKR', - 'ko_kr.euckr': 'ko_KR.eucKR', - 'kok_in': 'kok_IN.UTF-8', - 'korean': 'ko_KR.eucKR', - 'korean.euc': 'ko_KR.eucKR', - 'ks': 'ks_IN.UTF-8', - 'ks_in': 'ks_IN.UTF-8', - 'ks_in@devanagari': 'ks_IN.UTF-8@devanagari', - 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari', - 'ku_tr': 'ku_TR.ISO8859-9', - 'kw': 'kw_GB.ISO8859-1', - 'kw_gb': 'kw_GB.ISO8859-1', - 'kw_gb.iso88591': 'kw_GB.ISO8859-1', - 'kw_gb.iso885914': 'kw_GB.ISO8859-14', - 'kw_gb.iso885915': 'kw_GB.ISO8859-15', - 'kw_gb@euro': 'kw_GB.ISO8859-15', - 'ky': 'ky_KG.UTF-8', - 'ky_kg': 'ky_KG.UTF-8', - 'lb_lu': 'lb_LU.UTF-8', - 'lg_ug': 'lg_UG.ISO8859-10', - 'li_be': 'li_BE.UTF-8', - 'li_nl': 'li_NL.UTF-8', - 'lij_it': 'lij_IT.UTF-8', - 'lithuanian': 'lt_LT.ISO8859-13', - 'lo': 'lo_LA.MULELAO-1', - 'lo_la': 'lo_LA.MULELAO-1', - 'lo_la.cp1133': 'lo_LA.IBM-CP1133', - 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133', - 'lo_la.mulelao1': 'lo_LA.MULELAO-1', - 'lt': 'lt_LT.ISO8859-13', - 'lt_lt': 'lt_LT.ISO8859-13', - 'lt_lt.iso885913': 'lt_LT.ISO8859-13', - 'lt_lt.iso88594': 'lt_LT.ISO8859-4', - 'lv': 'lv_LV.ISO8859-13', - 'lv_lv': 'lv_LV.ISO8859-13', - 'lv_lv.iso885913': 'lv_LV.ISO8859-13', - 'lv_lv.iso88594': 'lv_LV.ISO8859-4', - 'mag_in': 'mag_IN.UTF-8', - 'mai': 'mai_IN.UTF-8', - 'mai_in': 'mai_IN.UTF-8', - 'mg_mg': 'mg_MG.ISO8859-15', - 'mhr_ru': 'mhr_RU.UTF-8', - 'mi': 'mi_NZ.ISO8859-1', - 'mi_nz': 'mi_NZ.ISO8859-1', - 'mi_nz.iso88591': 'mi_NZ.ISO8859-1', - 'mk': 'mk_MK.ISO8859-5', - 'mk_mk': 'mk_MK.ISO8859-5', - 'mk_mk.cp1251': 'mk_MK.CP1251', - 'mk_mk.iso88595': 'mk_MK.ISO8859-5', - 'mk_mk.microsoftcp1251': 'mk_MK.CP1251', - 'ml': 'ml_IN.UTF-8', - 'ml_in': 'ml_IN.UTF-8', - 'mn_mn': 'mn_MN.UTF-8', - 'mni_in': 'mni_IN.UTF-8', - 'mr': 'mr_IN.UTF-8', - 'mr_in': 'mr_IN.UTF-8', - 'ms': 'ms_MY.ISO8859-1', - 'ms_my': 'ms_MY.ISO8859-1', - 'ms_my.iso88591': 'ms_MY.ISO8859-1', - 'mt': 'mt_MT.ISO8859-3', - 'mt_mt': 'mt_MT.ISO8859-3', - 'mt_mt.iso88593': 'mt_MT.ISO8859-3', - 'my_mm': 'my_MM.UTF-8', - 'nan_tw@latin': 'nan_TW.UTF-8@latin', - 'nb': 'nb_NO.ISO8859-1', - 'nb_no': 'nb_NO.ISO8859-1', - 'nb_no.88591': 'nb_NO.ISO8859-1', - 'nb_no.iso88591': 'nb_NO.ISO8859-1', - 'nb_no.iso885915': 'nb_NO.ISO8859-15', - 'nb_no@euro': 'nb_NO.ISO8859-15', - 'nds_de': 'nds_DE.UTF-8', - 'nds_nl': 'nds_NL.UTF-8', - 'ne_np': 'ne_NP.UTF-8', - 'nhn_mx': 'nhn_MX.UTF-8', - 'niu_nu': 'niu_NU.UTF-8', - 'niu_nz': 'niu_NZ.UTF-8', - 'nl': 'nl_NL.ISO8859-1', - 'nl.iso885915': 'nl_NL.ISO8859-15', - 'nl_aw': 'nl_AW.UTF-8', - 'nl_be': 'nl_BE.ISO8859-1', - 'nl_be.88591': 'nl_BE.ISO8859-1', - 'nl_be.iso88591': 'nl_BE.ISO8859-1', - 'nl_be.iso885915': 'nl_BE.ISO8859-15', - 'nl_be.iso885915@euro': 'nl_BE.ISO8859-15', - 'nl_be.utf8@euro': 'nl_BE.UTF-8', - 'nl_be@euro': 'nl_BE.ISO8859-15', - 'nl_nl': 'nl_NL.ISO8859-1', - 'nl_nl.88591': 'nl_NL.ISO8859-1', - 'nl_nl.iso88591': 'nl_NL.ISO8859-1', - 'nl_nl.iso885915': 'nl_NL.ISO8859-15', - 'nl_nl.iso885915@euro': 'nl_NL.ISO8859-15', - 'nl_nl.utf8@euro': 'nl_NL.UTF-8', - 'nl_nl@euro': 'nl_NL.ISO8859-15', - 'nn': 'nn_NO.ISO8859-1', - 'nn_no': 'nn_NO.ISO8859-1', - 'nn_no.88591': 'nn_NO.ISO8859-1', - 'nn_no.iso88591': 'nn_NO.ISO8859-1', - 'nn_no.iso885915': 'nn_NO.ISO8859-15', - 'nn_no@euro': 'nn_NO.ISO8859-15', - 'no': 'no_NO.ISO8859-1', - 'no@nynorsk': 'ny_NO.ISO8859-1', - 'no_no': 'no_NO.ISO8859-1', - 'no_no.88591': 'no_NO.ISO8859-1', - 'no_no.iso88591': 'no_NO.ISO8859-1', - 'no_no.iso885915': 'no_NO.ISO8859-15', - 'no_no.iso88591@bokmal': 'no_NO.ISO8859-1', - 'no_no.iso88591@nynorsk': 'no_NO.ISO8859-1', - 'no_no@euro': 'no_NO.ISO8859-15', - 'norwegian': 'no_NO.ISO8859-1', - 'norwegian.iso88591': 'no_NO.ISO8859-1', - 'nr': 'nr_ZA.ISO8859-1', - 'nr_za': 'nr_ZA.ISO8859-1', - 'nr_za.iso88591': 'nr_ZA.ISO8859-1', - 'nso': 'nso_ZA.ISO8859-15', - 'nso_za': 'nso_ZA.ISO8859-15', - 'nso_za.iso885915': 'nso_ZA.ISO8859-15', - 'ny': 'ny_NO.ISO8859-1', - 'ny_no': 'ny_NO.ISO8859-1', - 'ny_no.88591': 'ny_NO.ISO8859-1', - 'ny_no.iso88591': 'ny_NO.ISO8859-1', - 'ny_no.iso885915': 'ny_NO.ISO8859-15', - 'ny_no@euro': 'ny_NO.ISO8859-15', - 'nynorsk': 'nn_NO.ISO8859-1', - 'oc': 'oc_FR.ISO8859-1', - 'oc_fr': 'oc_FR.ISO8859-1', - 'oc_fr.iso88591': 'oc_FR.ISO8859-1', - 'oc_fr.iso885915': 'oc_FR.ISO8859-15', - 'oc_fr@euro': 'oc_FR.ISO8859-15', - 'om_et': 'om_ET.UTF-8', - 'om_ke': 'om_KE.ISO8859-1', - 'or': 'or_IN.UTF-8', - 'or_in': 'or_IN.UTF-8', - 'os_ru': 'os_RU.UTF-8', - 'pa': 'pa_IN.UTF-8', - 'pa_in': 'pa_IN.UTF-8', - 'pa_pk': 'pa_PK.UTF-8', - 'pap_an': 'pap_AN.UTF-8', - 'pd': 'pd_US.ISO8859-1', - 'pd_de': 'pd_DE.ISO8859-1', - 'pd_de.iso88591': 'pd_DE.ISO8859-1', - 'pd_de.iso885915': 'pd_DE.ISO8859-15', - 'pd_de@euro': 'pd_DE.ISO8859-15', - 'pd_us': 'pd_US.ISO8859-1', - 'pd_us.iso88591': 'pd_US.ISO8859-1', - 'pd_us.iso885915': 'pd_US.ISO8859-15', - 'pd_us@euro': 'pd_US.ISO8859-15', - 'ph': 'ph_PH.ISO8859-1', - 'ph_ph': 'ph_PH.ISO8859-1', - 'ph_ph.iso88591': 'ph_PH.ISO8859-1', - 'pl': 'pl_PL.ISO8859-2', - 'pl_pl': 'pl_PL.ISO8859-2', - 'pl_pl.iso88592': 'pl_PL.ISO8859-2', - 'polish': 'pl_PL.ISO8859-2', - 'portuguese': 'pt_PT.ISO8859-1', - 'portuguese.iso88591': 'pt_PT.ISO8859-1', - 'portuguese_brazil': 'pt_BR.ISO8859-1', - 'portuguese_brazil.8859': 'pt_BR.ISO8859-1', - 'posix': 'C', - 'posix-utf2': 'C', - 'pp': 'pp_AN.ISO8859-1', - 'pp_an': 'pp_AN.ISO8859-1', - 'pp_an.iso88591': 'pp_AN.ISO8859-1', - 'ps_af': 'ps_AF.UTF-8', - 'pt': 'pt_PT.ISO8859-1', - 'pt.iso885915': 'pt_PT.ISO8859-15', - 'pt_br': 'pt_BR.ISO8859-1', - 'pt_br.88591': 'pt_BR.ISO8859-1', - 'pt_br.iso88591': 'pt_BR.ISO8859-1', - 'pt_br.iso885915': 'pt_BR.ISO8859-15', - 'pt_br@euro': 'pt_BR.ISO8859-15', - 'pt_pt': 'pt_PT.ISO8859-1', - 'pt_pt.88591': 'pt_PT.ISO8859-1', - 'pt_pt.iso88591': 'pt_PT.ISO8859-1', - 'pt_pt.iso885915': 'pt_PT.ISO8859-15', - 'pt_pt.iso885915@euro': 'pt_PT.ISO8859-15', - 'pt_pt.utf8@euro': 'pt_PT.UTF-8', - 'pt_pt@euro': 'pt_PT.ISO8859-15', - 'ro': 'ro_RO.ISO8859-2', - 'ro_ro': 'ro_RO.ISO8859-2', - 'ro_ro.iso88592': 'ro_RO.ISO8859-2', - 'romanian': 'ro_RO.ISO8859-2', - 'ru': 'ru_RU.UTF-8', - 'ru.koi8r': 'ru_RU.KOI8-R', - 'ru_ru': 'ru_RU.UTF-8', - 'ru_ru.cp1251': 'ru_RU.CP1251', - 'ru_ru.iso88595': 'ru_RU.ISO8859-5', - 'ru_ru.koi8r': 'ru_RU.KOI8-R', - 'ru_ru.microsoftcp1251': 'ru_RU.CP1251', - 'ru_ua': 'ru_UA.KOI8-U', - 'ru_ua.cp1251': 'ru_UA.CP1251', - 'ru_ua.koi8u': 'ru_UA.KOI8-U', - 'ru_ua.microsoftcp1251': 'ru_UA.CP1251', - 'rumanian': 'ro_RO.ISO8859-2', - 'russian': 'ru_RU.ISO8859-5', - 'rw': 'rw_RW.ISO8859-1', - 'rw_rw': 'rw_RW.ISO8859-1', - 'rw_rw.iso88591': 'rw_RW.ISO8859-1', - 'sa_in': 'sa_IN.UTF-8', - 'sat_in': 'sat_IN.UTF-8', - 'sc_it': 'sc_IT.UTF-8', - 'sd': 'sd_IN.UTF-8', - 'sd@devanagari': 'sd_IN.UTF-8@devanagari', - 'sd_in': 'sd_IN.UTF-8', - 'sd_in@devanagari': 'sd_IN.UTF-8@devanagari', - 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari', - 'sd_pk': 'sd_PK.UTF-8', - 'se_no': 'se_NO.UTF-8', - 'serbocroatian': 'sr_RS.UTF-8@latin', - 'sh': 'sr_RS.UTF-8@latin', - 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2', - 'sh_hr': 'sh_HR.ISO8859-2', - 'sh_hr.iso88592': 'hr_HR.ISO8859-2', - 'sh_sp': 'sr_CS.ISO8859-2', - 'sh_yu': 'sr_RS.UTF-8@latin', - 'shs_ca': 'shs_CA.UTF-8', - 'si': 'si_LK.UTF-8', - 'si_lk': 'si_LK.UTF-8', - 'sid_et': 'sid_ET.UTF-8', - 'sinhala': 'si_LK.UTF-8', - 'sk': 'sk_SK.ISO8859-2', - 'sk_sk': 'sk_SK.ISO8859-2', - 'sk_sk.iso88592': 'sk_SK.ISO8859-2', - 'sl': 'sl_SI.ISO8859-2', - 'sl_cs': 'sl_CS.ISO8859-2', - 'sl_si': 'sl_SI.ISO8859-2', - 'sl_si.iso88592': 'sl_SI.ISO8859-2', - 'slovak': 'sk_SK.ISO8859-2', - 'slovene': 'sl_SI.ISO8859-2', - 'slovenian': 'sl_SI.ISO8859-2', - 'so_dj': 'so_DJ.ISO8859-1', - 'so_et': 'so_ET.UTF-8', - 'so_ke': 'so_KE.ISO8859-1', - 'so_so': 'so_SO.ISO8859-1', - 'sp': 'sr_CS.ISO8859-5', - 'sp_yu': 'sr_CS.ISO8859-5', - 'spanish': 'es_ES.ISO8859-1', - 'spanish.iso88591': 'es_ES.ISO8859-1', - 'spanish_spain': 'es_ES.ISO8859-1', - 'spanish_spain.8859': 'es_ES.ISO8859-1', - 'sq': 'sq_AL.ISO8859-2', - 'sq_al': 'sq_AL.ISO8859-2', - 'sq_al.iso88592': 'sq_AL.ISO8859-2', - 'sq_mk': 'sq_MK.UTF-8', - 'sr': 'sr_RS.UTF-8', - 'sr@cyrillic': 'sr_RS.UTF-8', - 'sr@latin': 'sr_RS.UTF-8@latin', - 'sr@latn': 'sr_CS.UTF-8@latin', - 'sr_cs': 'sr_CS.UTF-8', - 'sr_cs.iso88592': 'sr_CS.ISO8859-2', - 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2', - 'sr_cs.iso88595': 'sr_CS.ISO8859-5', - 'sr_cs.utf8@latn': 'sr_CS.UTF-8@latin', - 'sr_cs@latn': 'sr_CS.UTF-8@latin', - 'sr_me': 'sr_ME.UTF-8', - 'sr_rs': 'sr_RS.UTF-8', - 'sr_rs@latin': 'sr_RS.UTF-8@latin', - 'sr_rs@latn': 'sr_RS.UTF-8@latin', - 'sr_sp': 'sr_CS.ISO8859-2', - 'sr_yu': 'sr_RS.UTF-8@latin', - 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251', - 'sr_yu.iso88592': 'sr_CS.ISO8859-2', - 'sr_yu.iso88595': 'sr_CS.ISO8859-5', - 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5', - 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251', - 'sr_yu.utf8': 'sr_RS.UTF-8', - 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8', - 'sr_yu@cyrillic': 'sr_RS.UTF-8', - 'ss': 'ss_ZA.ISO8859-1', - 'ss_za': 'ss_ZA.ISO8859-1', - 'ss_za.iso88591': 'ss_ZA.ISO8859-1', - 'st': 'st_ZA.ISO8859-1', - 'st_za': 'st_ZA.ISO8859-1', - 'st_za.iso88591': 'st_ZA.ISO8859-1', - 'sv': 'sv_SE.ISO8859-1', - 'sv.iso885915': 'sv_SE.ISO8859-15', - 'sv_fi': 'sv_FI.ISO8859-1', - 'sv_fi.iso88591': 'sv_FI.ISO8859-1', - 'sv_fi.iso885915': 'sv_FI.ISO8859-15', - 'sv_fi.iso885915@euro': 'sv_FI.ISO8859-15', - 'sv_fi.utf8@euro': 'sv_FI.UTF-8', - 'sv_fi@euro': 'sv_FI.ISO8859-15', - 'sv_se': 'sv_SE.ISO8859-1', - 'sv_se.88591': 'sv_SE.ISO8859-1', - 'sv_se.iso88591': 'sv_SE.ISO8859-1', - 'sv_se.iso885915': 'sv_SE.ISO8859-15', - 'sv_se@euro': 'sv_SE.ISO8859-15', - 'sw_ke': 'sw_KE.UTF-8', - 'sw_tz': 'sw_TZ.UTF-8', - 'swedish': 'sv_SE.ISO8859-1', - 'swedish.iso88591': 'sv_SE.ISO8859-1', - 'szl_pl': 'szl_PL.UTF-8', - 'ta': 'ta_IN.TSCII-0', - 'ta_in': 'ta_IN.TSCII-0', - 'ta_in.tscii': 'ta_IN.TSCII-0', - 'ta_in.tscii0': 'ta_IN.TSCII-0', - 'ta_lk': 'ta_LK.UTF-8', - 'te': 'te_IN.UTF-8', - 'te_in': 'te_IN.UTF-8', - 'tg': 'tg_TJ.KOI8-C', - 'tg_tj': 'tg_TJ.KOI8-C', - 'tg_tj.koi8c': 'tg_TJ.KOI8-C', - 'th': 'th_TH.ISO8859-11', - 'th_th': 'th_TH.ISO8859-11', - 'th_th.iso885911': 'th_TH.ISO8859-11', - 'th_th.tactis': 'th_TH.TIS620', - 'th_th.tis620': 'th_TH.TIS620', - 'thai': 'th_TH.ISO8859-11', - 'ti_er': 'ti_ER.UTF-8', - 'ti_et': 'ti_ET.UTF-8', - 'tig_er': 'tig_ER.UTF-8', - 'tk_tm': 'tk_TM.UTF-8', - 'tl': 'tl_PH.ISO8859-1', - 'tl_ph': 'tl_PH.ISO8859-1', - 'tl_ph.iso88591': 'tl_PH.ISO8859-1', - 'tn': 'tn_ZA.ISO8859-15', - 'tn_za': 'tn_ZA.ISO8859-15', - 'tn_za.iso885915': 'tn_ZA.ISO8859-15', - 'tr': 'tr_TR.ISO8859-9', - 'tr_cy': 'tr_CY.ISO8859-9', - 'tr_tr': 'tr_TR.ISO8859-9', - 'tr_tr.iso88599': 'tr_TR.ISO8859-9', - 'ts': 'ts_ZA.ISO8859-1', - 'ts_za': 'ts_ZA.ISO8859-1', - 'ts_za.iso88591': 'ts_ZA.ISO8859-1', - 'tt': 'tt_RU.TATAR-CYR', - 'tt_ru': 'tt_RU.TATAR-CYR', - 'tt_ru.koi8c': 'tt_RU.KOI8-C', - 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR', - 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif', - 'turkish': 'tr_TR.ISO8859-9', - 'turkish.iso88599': 'tr_TR.ISO8859-9', - 'ug_cn': 'ug_CN.UTF-8', - 'uk': 'uk_UA.KOI8-U', - 'uk_ua': 'uk_UA.KOI8-U', - 'uk_ua.cp1251': 'uk_UA.CP1251', - 'uk_ua.iso88595': 'uk_UA.ISO8859-5', - 'uk_ua.koi8u': 'uk_UA.KOI8-U', - 'uk_ua.microsoftcp1251': 'uk_UA.CP1251', - 'univ': 'en_US.utf', - 'universal': 'en_US.utf', - 'universal.utf8@ucs4': 'en_US.UTF-8', - 'unm_us': 'unm_US.UTF-8', - 'ur': 'ur_PK.CP1256', - 'ur_in': 'ur_IN.UTF-8', - 'ur_pk': 'ur_PK.CP1256', - 'ur_pk.cp1256': 'ur_PK.CP1256', - 'ur_pk.microsoftcp1256': 'ur_PK.CP1256', - 'uz': 'uz_UZ.UTF-8', - 'uz_uz': 'uz_UZ.UTF-8', - 'uz_uz.iso88591': 'uz_UZ.ISO8859-1', - 'uz_uz.utf8@cyrillic': 'uz_UZ.UTF-8', - 'uz_uz@cyrillic': 'uz_UZ.UTF-8', - 've': 've_ZA.UTF-8', - 've_za': 've_ZA.UTF-8', - 'vi': 'vi_VN.TCVN', - 'vi_vn': 'vi_VN.TCVN', - 'vi_vn.tcvn': 'vi_VN.TCVN', - 'vi_vn.tcvn5712': 'vi_VN.TCVN', - 'vi_vn.viscii': 'vi_VN.VISCII', - 'vi_vn.viscii111': 'vi_VN.VISCII', - 'wa': 'wa_BE.ISO8859-1', - 'wa_be': 'wa_BE.ISO8859-1', - 'wa_be.iso88591': 'wa_BE.ISO8859-1', - 'wa_be.iso885915': 'wa_BE.ISO8859-15', - 'wa_be.iso885915@euro': 'wa_BE.ISO8859-15', - 'wa_be@euro': 'wa_BE.ISO8859-15', - 'wae_ch': 'wae_CH.UTF-8', - 'wal_et': 'wal_ET.UTF-8', - 'wo_sn': 'wo_SN.UTF-8', - 'xh': 'xh_ZA.ISO8859-1', - 'xh_za': 'xh_ZA.ISO8859-1', - 'xh_za.iso88591': 'xh_ZA.ISO8859-1', - 'yi': 'yi_US.CP1255', - 'yi_us': 'yi_US.CP1255', - 'yi_us.cp1255': 'yi_US.CP1255', - 'yi_us.microsoftcp1255': 'yi_US.CP1255', - 'yo_ng': 'yo_NG.UTF-8', - 'yue_hk': 'yue_HK.UTF-8', - 'zh': 'zh_CN.eucCN', - 'zh_cn': 'zh_CN.gb2312', - 'zh_cn.big5': 'zh_TW.big5', - 'zh_cn.euc': 'zh_CN.eucCN', - 'zh_cn.gb18030': 'zh_CN.gb18030', - 'zh_cn.gb2312': 'zh_CN.gb2312', - 'zh_cn.gbk': 'zh_CN.gbk', - 'zh_hk': 'zh_HK.big5hkscs', - 'zh_hk.big5': 'zh_HK.big5', - 'zh_hk.big5hk': 'zh_HK.big5hkscs', - 'zh_hk.big5hkscs': 'zh_HK.big5hkscs', - 'zh_sg': 'zh_SG.GB2312', - 'zh_sg.gbk': 'zh_SG.GBK', - 'zh_tw': 'zh_TW.big5', - 'zh_tw.big5': 'zh_TW.big5', - 'zh_tw.euc': 'zh_TW.eucTW', - 'zh_tw.euctw': 'zh_TW.eucTW', - 'zu': 'zu_ZA.ISO8859-1', - 'zu_za': 'zu_ZA.ISO8859-1', - 'zu_za.iso88591': 'zu_ZA.ISO8859-1', -} - -# -# This maps Windows language identifiers to locale strings. -# -# This list has been updated from -# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp -# to include every locale up to Windows Vista. -# -# NOTE: this mapping is incomplete. If your language is missing, please -# submit a bug report to the Python bug tracker at http://bugs.python.org/ -# Make sure you include the missing language identifier and the suggested -# locale code. -# - -windows_locale = { - 0x0436: "af_ZA", # Afrikaans - 0x041c: "sq_AL", # Albanian - 0x0484: "gsw_FR",# Alsatian - France - 0x045e: "am_ET", # Amharic - Ethiopia - 0x0401: "ar_SA", # Arabic - Saudi Arabia - 0x0801: "ar_IQ", # Arabic - Iraq - 0x0c01: "ar_EG", # Arabic - Egypt - 0x1001: "ar_LY", # Arabic - Libya - 0x1401: "ar_DZ", # Arabic - Algeria - 0x1801: "ar_MA", # Arabic - Morocco - 0x1c01: "ar_TN", # Arabic - Tunisia - 0x2001: "ar_OM", # Arabic - Oman - 0x2401: "ar_YE", # Arabic - Yemen - 0x2801: "ar_SY", # Arabic - Syria - 0x2c01: "ar_JO", # Arabic - Jordan - 0x3001: "ar_LB", # Arabic - Lebanon - 0x3401: "ar_KW", # Arabic - Kuwait - 0x3801: "ar_AE", # Arabic - United Arab Emirates - 0x3c01: "ar_BH", # Arabic - Bahrain - 0x4001: "ar_QA", # Arabic - Qatar - 0x042b: "hy_AM", # Armenian - 0x044d: "as_IN", # Assamese - India - 0x042c: "az_AZ", # Azeri - Latin - 0x082c: "az_AZ", # Azeri - Cyrillic - 0x046d: "ba_RU", # Bashkir - 0x042d: "eu_ES", # Basque - Russia - 0x0423: "be_BY", # Belarusian - 0x0445: "bn_IN", # Begali - 0x201a: "bs_BA", # Bosnian - Cyrillic - 0x141a: "bs_BA", # Bosnian - Latin - 0x047e: "br_FR", # Breton - France - 0x0402: "bg_BG", # Bulgarian -# 0x0455: "my_MM", # Burmese - Not supported - 0x0403: "ca_ES", # Catalan - 0x0004: "zh_CHS",# Chinese - Simplified - 0x0404: "zh_TW", # Chinese - Taiwan - 0x0804: "zh_CN", # Chinese - PRC - 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R. - 0x1004: "zh_SG", # Chinese - Singapore - 0x1404: "zh_MO", # Chinese - Macao S.A.R. - 0x7c04: "zh_CHT",# Chinese - Traditional - 0x0483: "co_FR", # Corsican - France - 0x041a: "hr_HR", # Croatian - 0x101a: "hr_BA", # Croatian - Bosnia - 0x0405: "cs_CZ", # Czech - 0x0406: "da_DK", # Danish - 0x048c: "gbz_AF",# Dari - Afghanistan - 0x0465: "div_MV",# Divehi - Maldives - 0x0413: "nl_NL", # Dutch - The Netherlands - 0x0813: "nl_BE", # Dutch - Belgium - 0x0409: "en_US", # English - United States - 0x0809: "en_GB", # English - United Kingdom - 0x0c09: "en_AU", # English - Australia - 0x1009: "en_CA", # English - Canada - 0x1409: "en_NZ", # English - New Zealand - 0x1809: "en_IE", # English - Ireland - 0x1c09: "en_ZA", # English - South Africa - 0x2009: "en_JA", # English - Jamaica - 0x2409: "en_CB", # English - Caribbean - 0x2809: "en_BZ", # English - Belize - 0x2c09: "en_TT", # English - Trinidad - 0x3009: "en_ZW", # English - Zimbabwe - 0x3409: "en_PH", # English - Philippines - 0x4009: "en_IN", # English - India - 0x4409: "en_MY", # English - Malaysia - 0x4809: "en_IN", # English - Singapore - 0x0425: "et_EE", # Estonian - 0x0438: "fo_FO", # Faroese - 0x0464: "fil_PH",# Filipino - 0x040b: "fi_FI", # Finnish - 0x040c: "fr_FR", # French - France - 0x080c: "fr_BE", # French - Belgium - 0x0c0c: "fr_CA", # French - Canada - 0x100c: "fr_CH", # French - Switzerland - 0x140c: "fr_LU", # French - Luxembourg - 0x180c: "fr_MC", # French - Monaco - 0x0462: "fy_NL", # Frisian - Netherlands - 0x0456: "gl_ES", # Galician - 0x0437: "ka_GE", # Georgian - 0x0407: "de_DE", # German - Germany - 0x0807: "de_CH", # German - Switzerland - 0x0c07: "de_AT", # German - Austria - 0x1007: "de_LU", # German - Luxembourg - 0x1407: "de_LI", # German - Liechtenstein - 0x0408: "el_GR", # Greek - 0x046f: "kl_GL", # Greenlandic - Greenland - 0x0447: "gu_IN", # Gujarati - 0x0468: "ha_NG", # Hausa - Latin - 0x040d: "he_IL", # Hebrew - 0x0439: "hi_IN", # Hindi - 0x040e: "hu_HU", # Hungarian - 0x040f: "is_IS", # Icelandic - 0x0421: "id_ID", # Indonesian - 0x045d: "iu_CA", # Inuktitut - Syllabics - 0x085d: "iu_CA", # Inuktitut - Latin - 0x083c: "ga_IE", # Irish - Ireland - 0x0410: "it_IT", # Italian - Italy - 0x0810: "it_CH", # Italian - Switzerland - 0x0411: "ja_JP", # Japanese - 0x044b: "kn_IN", # Kannada - India - 0x043f: "kk_KZ", # Kazakh - 0x0453: "kh_KH", # Khmer - Cambodia - 0x0486: "qut_GT",# K'iche - Guatemala - 0x0487: "rw_RW", # Kinyarwanda - Rwanda - 0x0457: "kok_IN",# Konkani - 0x0412: "ko_KR", # Korean - 0x0440: "ky_KG", # Kyrgyz - 0x0454: "lo_LA", # Lao - Lao PDR - 0x0426: "lv_LV", # Latvian - 0x0427: "lt_LT", # Lithuanian - 0x082e: "dsb_DE",# Lower Sorbian - Germany - 0x046e: "lb_LU", # Luxembourgish - 0x042f: "mk_MK", # FYROM Macedonian - 0x043e: "ms_MY", # Malay - Malaysia - 0x083e: "ms_BN", # Malay - Brunei Darussalam - 0x044c: "ml_IN", # Malayalam - India - 0x043a: "mt_MT", # Maltese - 0x0481: "mi_NZ", # Maori - 0x047a: "arn_CL",# Mapudungun - 0x044e: "mr_IN", # Marathi - 0x047c: "moh_CA",# Mohawk - Canada - 0x0450: "mn_MN", # Mongolian - Cyrillic - 0x0850: "mn_CN", # Mongolian - PRC - 0x0461: "ne_NP", # Nepali - 0x0414: "nb_NO", # Norwegian - Bokmal - 0x0814: "nn_NO", # Norwegian - Nynorsk - 0x0482: "oc_FR", # Occitan - France - 0x0448: "or_IN", # Oriya - India - 0x0463: "ps_AF", # Pashto - Afghanistan - 0x0429: "fa_IR", # Persian - 0x0415: "pl_PL", # Polish - 0x0416: "pt_BR", # Portuguese - Brazil - 0x0816: "pt_PT", # Portuguese - Portugal - 0x0446: "pa_IN", # Punjabi - 0x046b: "quz_BO",# Quechua (Bolivia) - 0x086b: "quz_EC",# Quechua (Ecuador) - 0x0c6b: "quz_PE",# Quechua (Peru) - 0x0418: "ro_RO", # Romanian - Romania - 0x0417: "rm_CH", # Romansh - 0x0419: "ru_RU", # Russian - 0x243b: "smn_FI",# Sami Finland - 0x103b: "smj_NO",# Sami Norway - 0x143b: "smj_SE",# Sami Sweden - 0x043b: "se_NO", # Sami Northern Norway - 0x083b: "se_SE", # Sami Northern Sweden - 0x0c3b: "se_FI", # Sami Northern Finland - 0x203b: "sms_FI",# Sami Skolt - 0x183b: "sma_NO",# Sami Southern Norway - 0x1c3b: "sma_SE",# Sami Southern Sweden - 0x044f: "sa_IN", # Sanskrit - 0x0c1a: "sr_SP", # Serbian - Cyrillic - 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic - 0x081a: "sr_SP", # Serbian - Latin - 0x181a: "sr_BA", # Serbian - Bosnia Latin - 0x045b: "si_LK", # Sinhala - Sri Lanka - 0x046c: "ns_ZA", # Northern Sotho - 0x0432: "tn_ZA", # Setswana - Southern Africa - 0x041b: "sk_SK", # Slovak - 0x0424: "sl_SI", # Slovenian - 0x040a: "es_ES", # Spanish - Spain - 0x080a: "es_MX", # Spanish - Mexico - 0x0c0a: "es_ES", # Spanish - Spain (Modern) - 0x100a: "es_GT", # Spanish - Guatemala - 0x140a: "es_CR", # Spanish - Costa Rica - 0x180a: "es_PA", # Spanish - Panama - 0x1c0a: "es_DO", # Spanish - Dominican Republic - 0x200a: "es_VE", # Spanish - Venezuela - 0x240a: "es_CO", # Spanish - Colombia - 0x280a: "es_PE", # Spanish - Peru - 0x2c0a: "es_AR", # Spanish - Argentina - 0x300a: "es_EC", # Spanish - Ecuador - 0x340a: "es_CL", # Spanish - Chile - 0x380a: "es_UR", # Spanish - Uruguay - 0x3c0a: "es_PY", # Spanish - Paraguay - 0x400a: "es_BO", # Spanish - Bolivia - 0x440a: "es_SV", # Spanish - El Salvador - 0x480a: "es_HN", # Spanish - Honduras - 0x4c0a: "es_NI", # Spanish - Nicaragua - 0x500a: "es_PR", # Spanish - Puerto Rico - 0x540a: "es_US", # Spanish - United States -# 0x0430: "", # Sutu - Not supported - 0x0441: "sw_KE", # Swahili - 0x041d: "sv_SE", # Swedish - Sweden - 0x081d: "sv_FI", # Swedish - Finland - 0x045a: "syr_SY",# Syriac - 0x0428: "tg_TJ", # Tajik - Cyrillic - 0x085f: "tmz_DZ",# Tamazight - Latin - 0x0449: "ta_IN", # Tamil - 0x0444: "tt_RU", # Tatar - 0x044a: "te_IN", # Telugu - 0x041e: "th_TH", # Thai - 0x0851: "bo_BT", # Tibetan - Bhutan - 0x0451: "bo_CN", # Tibetan - PRC - 0x041f: "tr_TR", # Turkish - 0x0442: "tk_TM", # Turkmen - Cyrillic - 0x0480: "ug_CN", # Uighur - Arabic - 0x0422: "uk_UA", # Ukrainian - 0x042e: "wen_DE",# Upper Sorbian - Germany - 0x0420: "ur_PK", # Urdu - 0x0820: "ur_IN", # Urdu - India - 0x0443: "uz_UZ", # Uzbek - Latin - 0x0843: "uz_UZ", # Uzbek - Cyrillic - 0x042a: "vi_VN", # Vietnamese - 0x0452: "cy_GB", # Welsh - 0x0488: "wo_SN", # Wolof - Senegal - 0x0434: "xh_ZA", # Xhosa - South Africa - 0x0485: "sah_RU",# Yakut - Cyrillic - 0x0478: "ii_CN", # Yi - PRC - 0x046a: "yo_NG", # Yoruba - Nigeria - 0x0435: "zu_ZA", # Zulu -} - -def _print_locale(): - - """ Test function. - """ - categories = {} - def _init_categories(categories=categories): - for k,v in globals().items(): - if k[:3] == 'LC_': - categories[k] = v - _init_categories() - del categories['LC_ALL'] - - print 'Locale defaults as determined by getdefaultlocale():' - print '-'*72 - lang, enc = getdefaultlocale() - print 'Language: ', lang or '(undefined)' - print 'Encoding: ', enc or '(undefined)' - print - - print 'Locale settings on startup:' - print '-'*72 - for name,category in categories.items(): - print name, '...' - lang, enc = getlocale(category) - print ' Language: ', lang or '(undefined)' - print ' Encoding: ', enc or '(undefined)' - print - - print - print 'Locale settings after calling resetlocale():' - print '-'*72 - resetlocale() - for name,category in categories.items(): - print name, '...' - lang, enc = getlocale(category) - print ' Language: ', lang or '(undefined)' - print ' Encoding: ', enc or '(undefined)' - print - - try: - setlocale(LC_ALL, "") - except: - print 'NOTE:' - print 'setlocale(LC_ALL, "") does not support the default locale' - print 'given in the OS environment variables.' - else: - print - print 'Locale settings after calling setlocale(LC_ALL, ""):' - print '-'*72 - for name,category in categories.items(): - print name, '...' - lang, enc = getlocale(category) - print ' Language: ', lang or '(undefined)' - print ' Encoding: ', enc or '(undefined)' - print - -### - -try: - LC_MESSAGES -except NameError: - pass -else: - __all__.append("LC_MESSAGES") - -if __name__=='__main__': - print 'Locale aliasing:' - print - _print_locale() - print - print 'Number formatting:' - print - _test() diff --git a/python/Lib/logging/__init__.py b/python/Lib/logging/__init__.py deleted file mode 100755 index caf151d153..0000000000 --- a/python/Lib/logging/__init__.py +++ /dev/null @@ -1,1744 +0,0 @@ -# Copyright 2001-2014 by Vinay Sajip. All Rights Reserved. -# -# Permission to use, copy, modify, and distribute this software and its -# documentation for any purpose and without fee is hereby granted, -# provided that the above copyright notice appear in all copies and that -# both that copyright notice and this permission notice appear in -# supporting documentation, and that the name of Vinay Sajip -# not be used in advertising or publicity pertaining to distribution -# of the software without specific, written prior permission. -# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL -# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR -# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT -# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -""" -Logging package for Python. Based on PEP 282 and comments thereto in -comp.lang.python. - -Copyright (C) 2001-2014 Vinay Sajip. All Rights Reserved. - -To use, simply 'import logging' and log away! -""" - -import sys, os, time, cStringIO, traceback, warnings, weakref, collections - -__all__ = ['BASIC_FORMAT', 'BufferingFormatter', 'CRITICAL', 'DEBUG', 'ERROR', - 'FATAL', 'FileHandler', 'Filter', 'Formatter', 'Handler', 'INFO', - 'LogRecord', 'Logger', 'LoggerAdapter', 'NOTSET', 'NullHandler', - 'StreamHandler', 'WARN', 'WARNING', 'addLevelName', 'basicConfig', - 'captureWarnings', 'critical', 'debug', 'disable', 'error', - 'exception', 'fatal', 'getLevelName', 'getLogger', 'getLoggerClass', - 'info', 'log', 'makeLogRecord', 'setLoggerClass', 'warn', 'warning'] - -try: - import codecs -except ImportError: - codecs = None - -try: - import thread - import threading -except ImportError: - thread = None - -__author__ = "Vinay Sajip " -__status__ = "production" -# Note: the attributes below are no longer maintained. -__version__ = "0.5.1.2" -__date__ = "07 February 2010" - -#--------------------------------------------------------------------------- -# Miscellaneous module data -#--------------------------------------------------------------------------- -try: - unicode - _unicode = True -except NameError: - _unicode = False - -# next bit filched from 1.5.2's inspect.py -def currentframe(): - """Return the frame object for the caller's stack frame.""" - try: - raise Exception - except: - return sys.exc_info()[2].tb_frame.f_back - -if hasattr(sys, '_getframe'): currentframe = lambda: sys._getframe(3) -# done filching - -# -# _srcfile is used when walking the stack to check when we've got the first -# caller stack frame. -# -_srcfile = os.path.normcase(currentframe.__code__.co_filename) - -# _srcfile is only used in conjunction with sys._getframe(). -# To provide compatibility with older versions of Python, set _srcfile -# to None if _getframe() is not available; this value will prevent -# findCaller() from being called. -#if not hasattr(sys, "_getframe"): -# _srcfile = None - -# -#_startTime is used as the base when calculating the relative time of events -# -_startTime = time.time() - -# -#raiseExceptions is used to see if exceptions during handling should be -#propagated -# -raiseExceptions = 1 - -# -# If you don't want threading information in the log, set this to zero -# -logThreads = 1 - -# -# If you don't want multiprocessing information in the log, set this to zero -# -logMultiprocessing = 1 - -# -# If you don't want process information in the log, set this to zero -# -logProcesses = 1 - -#--------------------------------------------------------------------------- -# Level related stuff -#--------------------------------------------------------------------------- -# -# Default levels and level names, these can be replaced with any positive set -# of values having corresponding names. There is a pseudo-level, NOTSET, which -# is only really there as a lower limit for user-defined levels. Handlers and -# loggers are initialized with NOTSET so that they will log all messages, even -# at user-defined levels. -# - -CRITICAL = 50 -FATAL = CRITICAL -ERROR = 40 -WARNING = 30 -WARN = WARNING -INFO = 20 -DEBUG = 10 -NOTSET = 0 - -_levelNames = { - CRITICAL : 'CRITICAL', - ERROR : 'ERROR', - WARNING : 'WARNING', - INFO : 'INFO', - DEBUG : 'DEBUG', - NOTSET : 'NOTSET', - 'CRITICAL' : CRITICAL, - 'ERROR' : ERROR, - 'WARN' : WARNING, - 'WARNING' : WARNING, - 'INFO' : INFO, - 'DEBUG' : DEBUG, - 'NOTSET' : NOTSET, -} - -def getLevelName(level): - """ - Return the textual representation of logging level 'level'. - - If the level is one of the predefined levels (CRITICAL, ERROR, WARNING, - INFO, DEBUG) then you get the corresponding string. If you have - associated levels with names using addLevelName then the name you have - associated with 'level' is returned. - - If a numeric value corresponding to one of the defined levels is passed - in, the corresponding string representation is returned. - - Otherwise, the string "Level %s" % level is returned. - """ - return _levelNames.get(level, ("Level %s" % level)) - -def addLevelName(level, levelName): - """ - Associate 'levelName' with 'level'. - - This is used when converting levels to text during message formatting. - """ - _acquireLock() - try: #unlikely to cause an exception, but you never know... - _levelNames[level] = levelName - _levelNames[levelName] = level - finally: - _releaseLock() - -def _checkLevel(level): - if isinstance(level, (int, long)): - rv = level - elif str(level) == level: - if level not in _levelNames: - raise ValueError("Unknown level: %r" % level) - rv = _levelNames[level] - else: - raise TypeError("Level not an integer or a valid string: %r" % level) - return rv - -#--------------------------------------------------------------------------- -# Thread-related stuff -#--------------------------------------------------------------------------- - -# -#_lock is used to serialize access to shared data structures in this module. -#This needs to be an RLock because fileConfig() creates and configures -#Handlers, and so might arbitrary user threads. Since Handler code updates the -#shared dictionary _handlers, it needs to acquire the lock. But if configuring, -#the lock would already have been acquired - so we need an RLock. -#The same argument applies to Loggers and Manager.loggerDict. -# -if thread: - _lock = threading.RLock() -else: - _lock = None - -def _acquireLock(): - """ - Acquire the module-level lock for serializing access to shared data. - - This should be released with _releaseLock(). - """ - if _lock: - _lock.acquire() - -def _releaseLock(): - """ - Release the module-level lock acquired by calling _acquireLock(). - """ - if _lock: - _lock.release() - -#--------------------------------------------------------------------------- -# The logging record -#--------------------------------------------------------------------------- - -class LogRecord(object): - """ - A LogRecord instance represents an event being logged. - - LogRecord instances are created every time something is logged. They - contain all the information pertinent to the event being logged. The - main information passed in is in msg and args, which are combined - using str(msg) % args to create the message field of the record. The - record also includes information such as when the record was created, - the source line where the logging call was made, and any exception - information to be logged. - """ - def __init__(self, name, level, pathname, lineno, - msg, args, exc_info, func=None): - """ - Initialize a logging record with interesting information. - """ - ct = time.time() - self.name = name - self.msg = msg - # - # The following statement allows passing of a dictionary as a sole - # argument, so that you can do something like - # logging.debug("a %(a)d b %(b)s", {'a':1, 'b':2}) - # Suggested by Stefan Behnel. - # Note that without the test for args[0], we get a problem because - # during formatting, we test to see if the arg is present using - # 'if self.args:'. If the event being logged is e.g. 'Value is %d' - # and if the passed arg fails 'if self.args:' then no formatting - # is done. For example, logger.warn('Value is %d', 0) would log - # 'Value is %d' instead of 'Value is 0'. - # For the use case of passing a dictionary, this should not be a - # problem. - # Issue #21172: a request was made to relax the isinstance check - # to hasattr(args[0], '__getitem__'). However, the docs on string - # formatting still seem to suggest a mapping object is required. - # Thus, while not removing the isinstance check, it does now look - # for collections.Mapping rather than, as before, dict. - if (args and len(args) == 1 and isinstance(args[0], collections.Mapping) - and args[0]): - args = args[0] - self.args = args - self.levelname = getLevelName(level) - self.levelno = level - self.pathname = pathname - try: - self.filename = os.path.basename(pathname) - self.module = os.path.splitext(self.filename)[0] - except (TypeError, ValueError, AttributeError): - self.filename = pathname - self.module = "Unknown module" - self.exc_info = exc_info - self.exc_text = None # used to cache the traceback text - self.lineno = lineno - self.funcName = func - self.created = ct - self.msecs = (ct - long(ct)) * 1000 - self.relativeCreated = (self.created - _startTime) * 1000 - if logThreads and thread: - self.thread = thread.get_ident() - self.threadName = threading.current_thread().name - else: - self.thread = None - self.threadName = None - if not logMultiprocessing: - self.processName = None - else: - self.processName = 'MainProcess' - mp = sys.modules.get('multiprocessing') - if mp is not None: - # Errors may occur if multiprocessing has not finished loading - # yet - e.g. if a custom import hook causes third-party code - # to run when multiprocessing calls import. See issue 8200 - # for an example - try: - self.processName = mp.current_process().name - except StandardError: - pass - if logProcesses and hasattr(os, 'getpid'): - self.process = os.getpid() - else: - self.process = None - - def __str__(self): - return ''%(self.name, self.levelno, - self.pathname, self.lineno, self.msg) - - def getMessage(self): - """ - Return the message for this LogRecord. - - Return the message for this LogRecord after merging any user-supplied - arguments with the message. - """ - if not _unicode: #if no unicode support... - msg = str(self.msg) - else: - msg = self.msg - if not isinstance(msg, basestring): - try: - msg = str(self.msg) - except UnicodeError: - msg = self.msg #Defer encoding till later - if self.args: - msg = msg % self.args - return msg - -def makeLogRecord(dict): - """ - Make a LogRecord whose attributes are defined by the specified dictionary, - This function is useful for converting a logging event received over - a socket connection (which is sent as a dictionary) into a LogRecord - instance. - """ - rv = LogRecord(None, None, "", 0, "", (), None, None) - rv.__dict__.update(dict) - return rv - -#--------------------------------------------------------------------------- -# Formatter classes and functions -#--------------------------------------------------------------------------- - -class Formatter(object): - """ - Formatter instances are used to convert a LogRecord to text. - - Formatters need to know how a LogRecord is constructed. They are - responsible for converting a LogRecord to (usually) a string which can - be interpreted by either a human or an external system. The base Formatter - allows a formatting string to be specified. If none is supplied, the - default value of "%s(message)\\n" is used. - - The Formatter can be initialized with a format string which makes use of - knowledge of the LogRecord attributes - e.g. the default value mentioned - above makes use of the fact that the user's message and arguments are pre- - formatted into a LogRecord's message attribute. Currently, the useful - attributes in a LogRecord are described by: - - %(name)s Name of the logger (logging channel) - %(levelno)s Numeric logging level for the message (DEBUG, INFO, - WARNING, ERROR, CRITICAL) - %(levelname)s Text logging level for the message ("DEBUG", "INFO", - "WARNING", "ERROR", "CRITICAL") - %(pathname)s Full pathname of the source file where the logging - call was issued (if available) - %(filename)s Filename portion of pathname - %(module)s Module (name portion of filename) - %(lineno)d Source line number where the logging call was issued - (if available) - %(funcName)s Function name - %(created)f Time when the LogRecord was created (time.time() - return value) - %(asctime)s Textual time when the LogRecord was created - %(msecs)d Millisecond portion of the creation time - %(relativeCreated)d Time in milliseconds when the LogRecord was created, - relative to the time the logging module was loaded - (typically at application startup time) - %(thread)d Thread ID (if available) - %(threadName)s Thread name (if available) - %(process)d Process ID (if available) - %(message)s The result of record.getMessage(), computed just as - the record is emitted - """ - - converter = time.localtime - - def __init__(self, fmt=None, datefmt=None): - """ - Initialize the formatter with specified format strings. - - Initialize the formatter either with the specified format string, or a - default as described above. Allow for specialized date formatting with - the optional datefmt argument (if omitted, you get the ISO8601 format). - """ - if fmt: - self._fmt = fmt - else: - self._fmt = "%(message)s" - self.datefmt = datefmt - - def formatTime(self, record, datefmt=None): - """ - Return the creation time of the specified LogRecord as formatted text. - - This method should be called from format() by a formatter which - wants to make use of a formatted time. This method can be overridden - in formatters to provide for any specific requirement, but the - basic behaviour is as follows: if datefmt (a string) is specified, - it is used with time.strftime() to format the creation time of the - record. Otherwise, the ISO8601 format is used. The resulting - string is returned. This function uses a user-configurable function - to convert the creation time to a tuple. By default, time.localtime() - is used; to change this for a particular formatter instance, set the - 'converter' attribute to a function with the same signature as - time.localtime() or time.gmtime(). To change it for all formatters, - for example if you want all logging times to be shown in GMT, - set the 'converter' attribute in the Formatter class. - """ - ct = self.converter(record.created) - if datefmt: - s = time.strftime(datefmt, ct) - else: - t = time.strftime("%Y-%m-%d %H:%M:%S", ct) - s = "%s,%03d" % (t, record.msecs) - return s - - def formatException(self, ei): - """ - Format and return the specified exception information as a string. - - This default implementation just uses - traceback.print_exception() - """ - sio = cStringIO.StringIO() - traceback.print_exception(ei[0], ei[1], ei[2], None, sio) - s = sio.getvalue() - sio.close() - if s[-1:] == "\n": - s = s[:-1] - return s - - def usesTime(self): - """ - Check if the format uses the creation time of the record. - """ - return self._fmt.find("%(asctime)") >= 0 - - def format(self, record): - """ - Format the specified record as text. - - The record's attribute dictionary is used as the operand to a - string formatting operation which yields the returned string. - Before formatting the dictionary, a couple of preparatory steps - are carried out. The message attribute of the record is computed - using LogRecord.getMessage(). If the formatting string uses the - time (as determined by a call to usesTime(), formatTime() is - called to format the event time. If there is exception information, - it is formatted using formatException() and appended to the message. - """ - record.message = record.getMessage() - if self.usesTime(): - record.asctime = self.formatTime(record, self.datefmt) - try: - s = self._fmt % record.__dict__ - except UnicodeDecodeError as e: - # Issue 25664. The logger name may be Unicode. Try again ... - try: - record.name = record.name.decode('utf-8') - s = self._fmt % record.__dict__ - except UnicodeDecodeError: - raise e - if record.exc_info: - # Cache the traceback text to avoid converting it multiple times - # (it's constant anyway) - if not record.exc_text: - record.exc_text = self.formatException(record.exc_info) - if record.exc_text: - if s[-1:] != "\n": - s = s + "\n" - try: - s = s + record.exc_text - except UnicodeError: - # Sometimes filenames have non-ASCII chars, which can lead - # to errors when s is Unicode and record.exc_text is str - # See issue 8924. - # We also use replace for when there are multiple - # encodings, e.g. UTF-8 for the filesystem and latin-1 - # for a script. See issue 13232. - s = s + record.exc_text.decode(sys.getfilesystemencoding(), - 'replace') - return s - -# -# The default formatter to use when no other is specified -# -_defaultFormatter = Formatter() - -class BufferingFormatter(object): - """ - A formatter suitable for formatting a number of records. - """ - def __init__(self, linefmt=None): - """ - Optionally specify a formatter which will be used to format each - individual record. - """ - if linefmt: - self.linefmt = linefmt - else: - self.linefmt = _defaultFormatter - - def formatHeader(self, records): - """ - Return the header string for the specified records. - """ - return "" - - def formatFooter(self, records): - """ - Return the footer string for the specified records. - """ - return "" - - def format(self, records): - """ - Format the specified records and return the result as a string. - """ - rv = "" - if len(records) > 0: - rv = rv + self.formatHeader(records) - for record in records: - rv = rv + self.linefmt.format(record) - rv = rv + self.formatFooter(records) - return rv - -#--------------------------------------------------------------------------- -# Filter classes and functions -#--------------------------------------------------------------------------- - -class Filter(object): - """ - Filter instances are used to perform arbitrary filtering of LogRecords. - - Loggers and Handlers can optionally use Filter instances to filter - records as desired. The base filter class only allows events which are - below a certain point in the logger hierarchy. For example, a filter - initialized with "A.B" will allow events logged by loggers "A.B", - "A.B.C", "A.B.C.D", "A.B.D" etc. but not "A.BB", "B.A.B" etc. If - initialized with the empty string, all events are passed. - """ - def __init__(self, name=''): - """ - Initialize a filter. - - Initialize with the name of the logger which, together with its - children, will have its events allowed through the filter. If no - name is specified, allow every event. - """ - self.name = name - self.nlen = len(name) - - def filter(self, record): - """ - Determine if the specified record is to be logged. - - Is the specified record to be logged? Returns 0 for no, nonzero for - yes. If deemed appropriate, the record may be modified in-place. - """ - if self.nlen == 0: - return 1 - elif self.name == record.name: - return 1 - elif record.name.find(self.name, 0, self.nlen) != 0: - return 0 - return (record.name[self.nlen] == ".") - -class Filterer(object): - """ - A base class for loggers and handlers which allows them to share - common code. - """ - def __init__(self): - """ - Initialize the list of filters to be an empty list. - """ - self.filters = [] - - def addFilter(self, filter): - """ - Add the specified filter to this handler. - """ - if not (filter in self.filters): - self.filters.append(filter) - - def removeFilter(self, filter): - """ - Remove the specified filter from this handler. - """ - if filter in self.filters: - self.filters.remove(filter) - - def filter(self, record): - """ - Determine if a record is loggable by consulting all the filters. - - The default is to allow the record to be logged; any filter can veto - this and the record is then dropped. Returns a zero value if a record - is to be dropped, else non-zero. - """ - rv = 1 - for f in self.filters: - if not f.filter(record): - rv = 0 - break - return rv - -#--------------------------------------------------------------------------- -# Handler classes and functions -#--------------------------------------------------------------------------- - -_handlers = weakref.WeakValueDictionary() #map of handler names to handlers -_handlerList = [] # added to allow handlers to be removed in reverse of order initialized - -def _removeHandlerRef(wr): - """ - Remove a handler reference from the internal cleanup list. - """ - # This function can be called during module teardown, when globals are - # set to None. It can also be called from another thread. So we need to - # pre-emptively grab the necessary globals and check if they're None, - # to prevent race conditions and failures during interpreter shutdown. - acquire, release, handlers = _acquireLock, _releaseLock, _handlerList - if acquire and release and handlers: - acquire() - try: - if wr in handlers: - handlers.remove(wr) - finally: - release() - -def _addHandlerRef(handler): - """ - Add a handler to the internal cleanup list using a weak reference. - """ - _acquireLock() - try: - _handlerList.append(weakref.ref(handler, _removeHandlerRef)) - finally: - _releaseLock() - -class Handler(Filterer): - """ - Handler instances dispatch logging events to specific destinations. - - The base handler class. Acts as a placeholder which defines the Handler - interface. Handlers can optionally use Formatter instances to format - records as desired. By default, no formatter is specified; in this case, - the 'raw' message as determined by record.message is logged. - """ - def __init__(self, level=NOTSET): - """ - Initializes the instance - basically setting the formatter to None - and the filter list to empty. - """ - Filterer.__init__(self) - self._name = None - self.level = _checkLevel(level) - self.formatter = None - # Add the handler to the global _handlerList (for cleanup on shutdown) - _addHandlerRef(self) - self.createLock() - - def get_name(self): - return self._name - - def set_name(self, name): - _acquireLock() - try: - if self._name in _handlers: - del _handlers[self._name] - self._name = name - if name: - _handlers[name] = self - finally: - _releaseLock() - - name = property(get_name, set_name) - - def createLock(self): - """ - Acquire a thread lock for serializing access to the underlying I/O. - """ - if thread: - self.lock = threading.RLock() - else: - self.lock = None - - def acquire(self): - """ - Acquire the I/O thread lock. - """ - if self.lock: - self.lock.acquire() - - def release(self): - """ - Release the I/O thread lock. - """ - if self.lock: - self.lock.release() - - def setLevel(self, level): - """ - Set the logging level of this handler. - """ - self.level = _checkLevel(level) - - def format(self, record): - """ - Format the specified record. - - If a formatter is set, use it. Otherwise, use the default formatter - for the module. - """ - if self.formatter: - fmt = self.formatter - else: - fmt = _defaultFormatter - return fmt.format(record) - - def emit(self, record): - """ - Do whatever it takes to actually log the specified logging record. - - This version is intended to be implemented by subclasses and so - raises a NotImplementedError. - """ - raise NotImplementedError('emit must be implemented ' - 'by Handler subclasses') - - def handle(self, record): - """ - Conditionally emit the specified logging record. - - Emission depends on filters which may have been added to the handler. - Wrap the actual emission of the record with acquisition/release of - the I/O thread lock. Returns whether the filter passed the record for - emission. - """ - rv = self.filter(record) - if rv: - self.acquire() - try: - self.emit(record) - finally: - self.release() - return rv - - def setFormatter(self, fmt): - """ - Set the formatter for this handler. - """ - self.formatter = fmt - - def flush(self): - """ - Ensure all logging output has been flushed. - - This version does nothing and is intended to be implemented by - subclasses. - """ - pass - - def close(self): - """ - Tidy up any resources used by the handler. - - This version removes the handler from an internal map of handlers, - _handlers, which is used for handler lookup by name. Subclasses - should ensure that this gets called from overridden close() - methods. - """ - #get the module data lock, as we're updating a shared structure. - _acquireLock() - try: #unlikely to raise an exception, but you never know... - if self._name and self._name in _handlers: - del _handlers[self._name] - finally: - _releaseLock() - - def handleError(self, record): - """ - Handle errors which occur during an emit() call. - - This method should be called from handlers when an exception is - encountered during an emit() call. If raiseExceptions is false, - exceptions get silently ignored. This is what is mostly wanted - for a logging system - most users will not care about errors in - the logging system, they are more interested in application errors. - You could, however, replace this with a custom handler if you wish. - The record which was being processed is passed in to this method. - """ - if raiseExceptions and sys.stderr: # see issue 13807 - ei = sys.exc_info() - try: - traceback.print_exception(ei[0], ei[1], ei[2], - None, sys.stderr) - sys.stderr.write('Logged from file %s, line %s\n' % ( - record.filename, record.lineno)) - except IOError: - pass # see issue 5971 - finally: - del ei - -class StreamHandler(Handler): - """ - A handler class which writes logging records, appropriately formatted, - to a stream. Note that this class does not close the stream, as - sys.stdout or sys.stderr may be used. - """ - - def __init__(self, stream=None): - """ - Initialize the handler. - - If stream is not specified, sys.stderr is used. - """ - Handler.__init__(self) - if stream is None: - stream = sys.stderr - self.stream = stream - - def flush(self): - """ - Flushes the stream. - """ - self.acquire() - try: - if self.stream and hasattr(self.stream, "flush"): - self.stream.flush() - finally: - self.release() - - def emit(self, record): - """ - Emit a record. - - If a formatter is specified, it is used to format the record. - The record is then written to the stream with a trailing newline. If - exception information is present, it is formatted using - traceback.print_exception and appended to the stream. If the stream - has an 'encoding' attribute, it is used to determine how to do the - output to the stream. - """ - try: - msg = self.format(record) - stream = self.stream - fs = "%s\n" - if not _unicode: #if no unicode support... - stream.write(fs % msg) - else: - try: - if (isinstance(msg, unicode) and - getattr(stream, 'encoding', None)): - ufs = u'%s\n' - try: - stream.write(ufs % msg) - except UnicodeEncodeError: - #Printing to terminals sometimes fails. For example, - #with an encoding of 'cp1251', the above write will - #work if written to a stream opened or wrapped by - #the codecs module, but fail when writing to a - #terminal even when the codepage is set to cp1251. - #An extra encoding step seems to be needed. - stream.write((ufs % msg).encode(stream.encoding)) - else: - stream.write(fs % msg) - except UnicodeError: - stream.write(fs % msg.encode("UTF-8")) - self.flush() - except (KeyboardInterrupt, SystemExit): - raise - except: - self.handleError(record) - -class FileHandler(StreamHandler): - """ - A handler class which writes formatted logging records to disk files. - """ - def __init__(self, filename, mode='a', encoding=None, delay=0): - """ - Open the specified file and use it as the stream for logging. - """ - #keep the absolute path, otherwise derived classes which use this - #may come a cropper when the current directory changes - if codecs is None: - encoding = None - self.baseFilename = os.path.abspath(filename) - self.mode = mode - self.encoding = encoding - self.delay = delay - if delay: - #We don't open the stream, but we still need to call the - #Handler constructor to set level, formatter, lock etc. - Handler.__init__(self) - self.stream = None - else: - StreamHandler.__init__(self, self._open()) - - def close(self): - """ - Closes the stream. - """ - self.acquire() - try: - try: - if self.stream: - try: - self.flush() - finally: - stream = self.stream - self.stream = None - if hasattr(stream, "close"): - stream.close() - finally: - # Issue #19523: call unconditionally to - # prevent a handler leak when delay is set - StreamHandler.close(self) - finally: - self.release() - - def _open(self): - """ - Open the current base file with the (original) mode and encoding. - Return the resulting stream. - """ - if self.encoding is None: - stream = open(self.baseFilename, self.mode) - else: - stream = codecs.open(self.baseFilename, self.mode, self.encoding) - return stream - - def emit(self, record): - """ - Emit a record. - - If the stream was not opened because 'delay' was specified in the - constructor, open it before calling the superclass's emit. - """ - if self.stream is None: - self.stream = self._open() - StreamHandler.emit(self, record) - -#--------------------------------------------------------------------------- -# Manager classes and functions -#--------------------------------------------------------------------------- - -class PlaceHolder(object): - """ - PlaceHolder instances are used in the Manager logger hierarchy to take - the place of nodes for which no loggers have been defined. This class is - intended for internal use only and not as part of the public API. - """ - def __init__(self, alogger): - """ - Initialize with the specified logger being a child of this placeholder. - """ - #self.loggers = [alogger] - self.loggerMap = { alogger : None } - - def append(self, alogger): - """ - Add the specified logger as a child of this placeholder. - """ - #if alogger not in self.loggers: - if alogger not in self.loggerMap: - #self.loggers.append(alogger) - self.loggerMap[alogger] = None - -# -# Determine which class to use when instantiating loggers. -# -_loggerClass = None - -def setLoggerClass(klass): - """ - Set the class to be used when instantiating a logger. The class should - define __init__() such that only a name argument is required, and the - __init__() should call Logger.__init__() - """ - if klass != Logger: - if not issubclass(klass, Logger): - raise TypeError("logger not derived from logging.Logger: " - + klass.__name__) - global _loggerClass - _loggerClass = klass - -def getLoggerClass(): - """ - Return the class to be used when instantiating a logger. - """ - - return _loggerClass - -class Manager(object): - """ - There is [under normal circumstances] just one Manager instance, which - holds the hierarchy of loggers. - """ - def __init__(self, rootnode): - """ - Initialize the manager with the root node of the logger hierarchy. - """ - self.root = rootnode - self.disable = 0 - self.emittedNoHandlerWarning = 0 - self.loggerDict = {} - self.loggerClass = None - - def getLogger(self, name): - """ - Get a logger with the specified name (channel name), creating it - if it doesn't yet exist. This name is a dot-separated hierarchical - name, such as "a", "a.b", "a.b.c" or similar. - - If a PlaceHolder existed for the specified name [i.e. the logger - didn't exist but a child of it did], replace it with the created - logger and fix up the parent/child references which pointed to the - placeholder to now point to the logger. - """ - rv = None - if not isinstance(name, basestring): - raise TypeError('A logger name must be string or Unicode') - if isinstance(name, unicode): - name = name.encode('utf-8') - _acquireLock() - try: - if name in self.loggerDict: - rv = self.loggerDict[name] - if isinstance(rv, PlaceHolder): - ph = rv - rv = (self.loggerClass or _loggerClass)(name) - rv.manager = self - self.loggerDict[name] = rv - self._fixupChildren(ph, rv) - self._fixupParents(rv) - else: - rv = (self.loggerClass or _loggerClass)(name) - rv.manager = self - self.loggerDict[name] = rv - self._fixupParents(rv) - finally: - _releaseLock() - return rv - - def setLoggerClass(self, klass): - """ - Set the class to be used when instantiating a logger with this Manager. - """ - if klass != Logger: - if not issubclass(klass, Logger): - raise TypeError("logger not derived from logging.Logger: " - + klass.__name__) - self.loggerClass = klass - - def _fixupParents(self, alogger): - """ - Ensure that there are either loggers or placeholders all the way - from the specified logger to the root of the logger hierarchy. - """ - name = alogger.name - i = name.rfind(".") - rv = None - while (i > 0) and not rv: - substr = name[:i] - if substr not in self.loggerDict: - self.loggerDict[substr] = PlaceHolder(alogger) - else: - obj = self.loggerDict[substr] - if isinstance(obj, Logger): - rv = obj - else: - assert isinstance(obj, PlaceHolder) - obj.append(alogger) - i = name.rfind(".", 0, i - 1) - if not rv: - rv = self.root - alogger.parent = rv - - def _fixupChildren(self, ph, alogger): - """ - Ensure that children of the placeholder ph are connected to the - specified logger. - """ - name = alogger.name - namelen = len(name) - for c in ph.loggerMap.keys(): - #The if means ... if not c.parent.name.startswith(nm) - if c.parent.name[:namelen] != name: - alogger.parent = c.parent - c.parent = alogger - -#--------------------------------------------------------------------------- -# Logger classes and functions -#--------------------------------------------------------------------------- - -class Logger(Filterer): - """ - Instances of the Logger class represent a single logging channel. A - "logging channel" indicates an area of an application. Exactly how an - "area" is defined is up to the application developer. Since an - application can have any number of areas, logging channels are identified - by a unique string. Application areas can be nested (e.g. an area - of "input processing" might include sub-areas "read CSV files", "read - XLS files" and "read Gnumeric files"). To cater for this natural nesting, - channel names are organized into a namespace hierarchy where levels are - separated by periods, much like the Java or Python package namespace. So - in the instance given above, channel names might be "input" for the upper - level, and "input.csv", "input.xls" and "input.gnu" for the sub-levels. - There is no arbitrary limit to the depth of nesting. - """ - def __init__(self, name, level=NOTSET): - """ - Initialize the logger with a name and an optional level. - """ - Filterer.__init__(self) - self.name = name - self.level = _checkLevel(level) - self.parent = None - self.propagate = 1 - self.handlers = [] - self.disabled = 0 - - def setLevel(self, level): - """ - Set the logging level of this logger. - """ - self.level = _checkLevel(level) - - def debug(self, msg, *args, **kwargs): - """ - Log 'msg % args' with severity 'DEBUG'. - - To pass exception information, use the keyword argument exc_info with - a true value, e.g. - - logger.debug("Houston, we have a %s", "thorny problem", exc_info=1) - """ - if self.isEnabledFor(DEBUG): - self._log(DEBUG, msg, args, **kwargs) - - def info(self, msg, *args, **kwargs): - """ - Log 'msg % args' with severity 'INFO'. - - To pass exception information, use the keyword argument exc_info with - a true value, e.g. - - logger.info("Houston, we have a %s", "interesting problem", exc_info=1) - """ - if self.isEnabledFor(INFO): - self._log(INFO, msg, args, **kwargs) - - def warning(self, msg, *args, **kwargs): - """ - Log 'msg % args' with severity 'WARNING'. - - To pass exception information, use the keyword argument exc_info with - a true value, e.g. - - logger.warning("Houston, we have a %s", "bit of a problem", exc_info=1) - """ - if self.isEnabledFor(WARNING): - self._log(WARNING, msg, args, **kwargs) - - warn = warning - - def error(self, msg, *args, **kwargs): - """ - Log 'msg % args' with severity 'ERROR'. - - To pass exception information, use the keyword argument exc_info with - a true value, e.g. - - logger.error("Houston, we have a %s", "major problem", exc_info=1) - """ - if self.isEnabledFor(ERROR): - self._log(ERROR, msg, args, **kwargs) - - def exception(self, msg, *args, **kwargs): - """ - Convenience method for logging an ERROR with exception information. - """ - kwargs['exc_info'] = 1 - self.error(msg, *args, **kwargs) - - def critical(self, msg, *args, **kwargs): - """ - Log 'msg % args' with severity 'CRITICAL'. - - To pass exception information, use the keyword argument exc_info with - a true value, e.g. - - logger.critical("Houston, we have a %s", "major disaster", exc_info=1) - """ - if self.isEnabledFor(CRITICAL): - self._log(CRITICAL, msg, args, **kwargs) - - fatal = critical - - def log(self, level, msg, *args, **kwargs): - """ - Log 'msg % args' with the integer severity 'level'. - - To pass exception information, use the keyword argument exc_info with - a true value, e.g. - - logger.log(level, "We have a %s", "mysterious problem", exc_info=1) - """ - if not isinstance(level, int): - if raiseExceptions: - raise TypeError("level must be an integer") - else: - return - if self.isEnabledFor(level): - self._log(level, msg, args, **kwargs) - - def findCaller(self): - """ - Find the stack frame of the caller so that we can note the source - file name, line number and function name. - """ - f = currentframe() - #On some versions of IronPython, currentframe() returns None if - #IronPython isn't run with -X:Frames. - if f is not None: - f = f.f_back - rv = "(unknown file)", 0, "(unknown function)" - while hasattr(f, "f_code"): - co = f.f_code - filename = os.path.normcase(co.co_filename) - if filename == _srcfile: - f = f.f_back - continue - rv = (co.co_filename, f.f_lineno, co.co_name) - break - return rv - - def makeRecord(self, name, level, fn, lno, msg, args, exc_info, func=None, extra=None): - """ - A factory method which can be overridden in subclasses to create - specialized LogRecords. - """ - rv = LogRecord(name, level, fn, lno, msg, args, exc_info, func) - if extra is not None: - for key in extra: - if (key in ["message", "asctime"]) or (key in rv.__dict__): - raise KeyError("Attempt to overwrite %r in LogRecord" % key) - rv.__dict__[key] = extra[key] - return rv - - def _log(self, level, msg, args, exc_info=None, extra=None): - """ - Low-level logging routine which creates a LogRecord and then calls - all the handlers of this logger to handle the record. - """ - if _srcfile: - #IronPython doesn't track Python frames, so findCaller raises an - #exception on some versions of IronPython. We trap it here so that - #IronPython can use logging. - try: - fn, lno, func = self.findCaller() - except ValueError: - fn, lno, func = "(unknown file)", 0, "(unknown function)" - else: - fn, lno, func = "(unknown file)", 0, "(unknown function)" - if exc_info: - if not isinstance(exc_info, tuple): - exc_info = sys.exc_info() - record = self.makeRecord(self.name, level, fn, lno, msg, args, exc_info, func, extra) - self.handle(record) - - def handle(self, record): - """ - Call the handlers for the specified record. - - This method is used for unpickled records received from a socket, as - well as those created locally. Logger-level filtering is applied. - """ - if (not self.disabled) and self.filter(record): - self.callHandlers(record) - - def addHandler(self, hdlr): - """ - Add the specified handler to this logger. - """ - _acquireLock() - try: - if not (hdlr in self.handlers): - self.handlers.append(hdlr) - finally: - _releaseLock() - - def removeHandler(self, hdlr): - """ - Remove the specified handler from this logger. - """ - _acquireLock() - try: - if hdlr in self.handlers: - self.handlers.remove(hdlr) - finally: - _releaseLock() - - def callHandlers(self, record): - """ - Pass a record to all relevant handlers. - - Loop through all handlers for this logger and its parents in the - logger hierarchy. If no handler was found, output a one-off error - message to sys.stderr. Stop searching up the hierarchy whenever a - logger with the "propagate" attribute set to zero is found - that - will be the last logger whose handlers are called. - """ - c = self - found = 0 - while c: - for hdlr in c.handlers: - found = found + 1 - if record.levelno >= hdlr.level: - hdlr.handle(record) - if not c.propagate: - c = None #break out - else: - c = c.parent - if (found == 0) and raiseExceptions and not self.manager.emittedNoHandlerWarning: - sys.stderr.write("No handlers could be found for logger" - " \"%s\"\n" % self.name) - self.manager.emittedNoHandlerWarning = 1 - - def getEffectiveLevel(self): - """ - Get the effective level for this logger. - - Loop through this logger and its parents in the logger hierarchy, - looking for a non-zero logging level. Return the first one found. - """ - logger = self - while logger: - if logger.level: - return logger.level - logger = logger.parent - return NOTSET - - def isEnabledFor(self, level): - """ - Is this logger enabled for level 'level'? - """ - if self.manager.disable >= level: - return 0 - return level >= self.getEffectiveLevel() - - def getChild(self, suffix): - """ - Get a logger which is a descendant to this one. - - This is a convenience method, such that - - logging.getLogger('abc').getChild('def.ghi') - - is the same as - - logging.getLogger('abc.def.ghi') - - It's useful, for example, when the parent logger is named using - __name__ rather than a literal string. - """ - if self.root is not self: - suffix = '.'.join((self.name, suffix)) - return self.manager.getLogger(suffix) - -class RootLogger(Logger): - """ - A root logger is not that different to any other logger, except that - it must have a logging level and there is only one instance of it in - the hierarchy. - """ - def __init__(self, level): - """ - Initialize the logger with the name "root". - """ - Logger.__init__(self, "root", level) - -_loggerClass = Logger - -class LoggerAdapter(object): - """ - An adapter for loggers which makes it easier to specify contextual - information in logging output. - """ - - def __init__(self, logger, extra): - """ - Initialize the adapter with a logger and a dict-like object which - provides contextual information. This constructor signature allows - easy stacking of LoggerAdapters, if so desired. - - You can effectively pass keyword arguments as shown in the - following example: - - adapter = LoggerAdapter(someLogger, dict(p1=v1, p2="v2")) - """ - self.logger = logger - self.extra = extra - - def process(self, msg, kwargs): - """ - Process the logging message and keyword arguments passed in to - a logging call to insert contextual information. You can either - manipulate the message itself, the keyword args or both. Return - the message and kwargs modified (or not) to suit your needs. - - Normally, you'll only need to override this one method in a - LoggerAdapter subclass for your specific needs. - """ - kwargs["extra"] = self.extra - return msg, kwargs - - def debug(self, msg, *args, **kwargs): - """ - Delegate a debug call to the underlying logger, after adding - contextual information from this adapter instance. - """ - msg, kwargs = self.process(msg, kwargs) - self.logger.debug(msg, *args, **kwargs) - - def info(self, msg, *args, **kwargs): - """ - Delegate an info call to the underlying logger, after adding - contextual information from this adapter instance. - """ - msg, kwargs = self.process(msg, kwargs) - self.logger.info(msg, *args, **kwargs) - - def warning(self, msg, *args, **kwargs): - """ - Delegate a warning call to the underlying logger, after adding - contextual information from this adapter instance. - """ - msg, kwargs = self.process(msg, kwargs) - self.logger.warning(msg, *args, **kwargs) - - def error(self, msg, *args, **kwargs): - """ - Delegate an error call to the underlying logger, after adding - contextual information from this adapter instance. - """ - msg, kwargs = self.process(msg, kwargs) - self.logger.error(msg, *args, **kwargs) - - def exception(self, msg, *args, **kwargs): - """ - Delegate an exception call to the underlying logger, after adding - contextual information from this adapter instance. - """ - msg, kwargs = self.process(msg, kwargs) - kwargs["exc_info"] = 1 - self.logger.error(msg, *args, **kwargs) - - def critical(self, msg, *args, **kwargs): - """ - Delegate a critical call to the underlying logger, after adding - contextual information from this adapter instance. - """ - msg, kwargs = self.process(msg, kwargs) - self.logger.critical(msg, *args, **kwargs) - - def log(self, level, msg, *args, **kwargs): - """ - Delegate a log call to the underlying logger, after adding - contextual information from this adapter instance. - """ - msg, kwargs = self.process(msg, kwargs) - self.logger.log(level, msg, *args, **kwargs) - - def isEnabledFor(self, level): - """ - See if the underlying logger is enabled for the specified level. - """ - return self.logger.isEnabledFor(level) - -root = RootLogger(WARNING) -Logger.root = root -Logger.manager = Manager(Logger.root) - -#--------------------------------------------------------------------------- -# Configuration classes and functions -#--------------------------------------------------------------------------- - -BASIC_FORMAT = "%(levelname)s:%(name)s:%(message)s" - -def basicConfig(**kwargs): - """ - Do basic configuration for the logging system. - - This function does nothing if the root logger already has handlers - configured. It is a convenience method intended for use by simple scripts - to do one-shot configuration of the logging package. - - The default behaviour is to create a StreamHandler which writes to - sys.stderr, set a formatter using the BASIC_FORMAT format string, and - add the handler to the root logger. - - A number of optional keyword arguments may be specified, which can alter - the default behaviour. - - filename Specifies that a FileHandler be created, using the specified - filename, rather than a StreamHandler. - filemode Specifies the mode to open the file, if filename is specified - (if filemode is unspecified, it defaults to 'a'). - format Use the specified format string for the handler. - datefmt Use the specified date/time format. - level Set the root logger level to the specified level. - stream Use the specified stream to initialize the StreamHandler. Note - that this argument is incompatible with 'filename' - if both - are present, 'stream' is ignored. - - Note that you could specify a stream created using open(filename, mode) - rather than passing the filename and mode in. However, it should be - remembered that StreamHandler does not close its stream (since it may be - using sys.stdout or sys.stderr), whereas FileHandler closes its stream - when the handler is closed. - """ - # Add thread safety in case someone mistakenly calls - # basicConfig() from multiple threads - _acquireLock() - try: - if len(root.handlers) == 0: - filename = kwargs.get("filename") - if filename: - mode = kwargs.get("filemode", 'a') - hdlr = FileHandler(filename, mode) - else: - stream = kwargs.get("stream") - hdlr = StreamHandler(stream) - fs = kwargs.get("format", BASIC_FORMAT) - dfs = kwargs.get("datefmt", None) - fmt = Formatter(fs, dfs) - hdlr.setFormatter(fmt) - root.addHandler(hdlr) - level = kwargs.get("level") - if level is not None: - root.setLevel(level) - finally: - _releaseLock() - -#--------------------------------------------------------------------------- -# Utility functions at module level. -# Basically delegate everything to the root logger. -#--------------------------------------------------------------------------- - -def getLogger(name=None): - """ - Return a logger with the specified name, creating it if necessary. - - If no name is specified, return the root logger. - """ - if name: - return Logger.manager.getLogger(name) - else: - return root - -#def getRootLogger(): -# """ -# Return the root logger. -# -# Note that getLogger('') now does the same thing, so this function is -# deprecated and may disappear in the future. -# """ -# return root - -def critical(msg, *args, **kwargs): - """ - Log a message with severity 'CRITICAL' on the root logger. - """ - if len(root.handlers) == 0: - basicConfig() - root.critical(msg, *args, **kwargs) - -fatal = critical - -def error(msg, *args, **kwargs): - """ - Log a message with severity 'ERROR' on the root logger. - """ - if len(root.handlers) == 0: - basicConfig() - root.error(msg, *args, **kwargs) - -def exception(msg, *args, **kwargs): - """ - Log a message with severity 'ERROR' on the root logger, - with exception information. - """ - kwargs['exc_info'] = 1 - error(msg, *args, **kwargs) - -def warning(msg, *args, **kwargs): - """ - Log a message with severity 'WARNING' on the root logger. - """ - if len(root.handlers) == 0: - basicConfig() - root.warning(msg, *args, **kwargs) - -warn = warning - -def info(msg, *args, **kwargs): - """ - Log a message with severity 'INFO' on the root logger. - """ - if len(root.handlers) == 0: - basicConfig() - root.info(msg, *args, **kwargs) - -def debug(msg, *args, **kwargs): - """ - Log a message with severity 'DEBUG' on the root logger. - """ - if len(root.handlers) == 0: - basicConfig() - root.debug(msg, *args, **kwargs) - -def log(level, msg, *args, **kwargs): - """ - Log 'msg % args' with the integer severity 'level' on the root logger. - """ - if len(root.handlers) == 0: - basicConfig() - root.log(level, msg, *args, **kwargs) - -def disable(level): - """ - Disable all logging calls of severity 'level' and below. - """ - root.manager.disable = level - -def shutdown(handlerList=_handlerList): - """ - Perform any cleanup actions in the logging system (e.g. flushing - buffers). - - Should be called at application exit. - """ - for wr in reversed(handlerList[:]): - #errors might occur, for example, if files are locked - #we just ignore them if raiseExceptions is not set - try: - h = wr() - if h: - try: - h.acquire() - h.flush() - h.close() - except (IOError, ValueError): - # Ignore errors which might be caused - # because handlers have been closed but - # references to them are still around at - # application exit. - pass - finally: - h.release() - except: - if raiseExceptions: - raise - #else, swallow - -#Let's try and shutdown automatically on application exit... -import atexit -atexit.register(shutdown) - -# Null handler - -class NullHandler(Handler): - """ - This handler does nothing. It's intended to be used to avoid the - "No handlers could be found for logger XXX" one-off warning. This is - important for library code, which may contain code to log events. If a user - of the library does not configure logging, the one-off warning might be - produced; to avoid this, the library developer simply needs to instantiate - a NullHandler and add it to the top-level logger of the library module or - package. - """ - def handle(self, record): - pass - - def emit(self, record): - pass - - def createLock(self): - self.lock = None - -# Warnings integration - -_warnings_showwarning = None - -def _showwarning(message, category, filename, lineno, file=None, line=None): - """ - Implementation of showwarnings which redirects to logging, which will first - check to see if the file parameter is None. If a file is specified, it will - delegate to the original warnings implementation of showwarning. Otherwise, - it will call warnings.formatwarning and will log the resulting string to a - warnings logger named "py.warnings" with level logging.WARNING. - """ - if file is not None: - if _warnings_showwarning is not None: - _warnings_showwarning(message, category, filename, lineno, file, line) - else: - s = warnings.formatwarning(message, category, filename, lineno, line) - logger = getLogger("py.warnings") - if not logger.handlers: - logger.addHandler(NullHandler()) - logger.warning("%s", s) - -def captureWarnings(capture): - """ - If capture is true, redirect all warnings to the logging package. - If capture is False, ensure that warnings are not redirected to logging - but to their original destinations. - """ - global _warnings_showwarning - if capture: - if _warnings_showwarning is None: - _warnings_showwarning = warnings.showwarning - warnings.showwarning = _showwarning - else: - if _warnings_showwarning is not None: - warnings.showwarning = _warnings_showwarning - _warnings_showwarning = None diff --git a/python/Lib/logging/config.py b/python/Lib/logging/config.py deleted file mode 100755 index 8b3795675d..0000000000 --- a/python/Lib/logging/config.py +++ /dev/null @@ -1,919 +0,0 @@ -# Copyright 2001-2014 by Vinay Sajip. All Rights Reserved. -# -# Permission to use, copy, modify, and distribute this software and its -# documentation for any purpose and without fee is hereby granted, -# provided that the above copyright notice appear in all copies and that -# both that copyright notice and this permission notice appear in -# supporting documentation, and that the name of Vinay Sajip -# not be used in advertising or publicity pertaining to distribution -# of the software without specific, written prior permission. -# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL -# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR -# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT -# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -""" -Configuration functions for the logging package for Python. The core package -is based on PEP 282 and comments thereto in comp.lang.python, and influenced -by Apache's log4j system. - -Copyright (C) 2001-2014 Vinay Sajip. All Rights Reserved. - -To use, simply 'import logging' and log away! -""" - -import cStringIO -import errno -import io -import logging -import logging.handlers -import os -import re -import socket -import struct -import sys -import traceback -import types - -try: - import thread - import threading -except ImportError: - thread = None - -from SocketServer import ThreadingTCPServer, StreamRequestHandler - - -DEFAULT_LOGGING_CONFIG_PORT = 9030 - -RESET_ERROR = errno.ECONNRESET - -# -# The following code implements a socket listener for on-the-fly -# reconfiguration of logging. -# -# _listener holds the server object doing the listening -_listener = None - -def fileConfig(fname, defaults=None, disable_existing_loggers=True): - """ - Read the logging configuration from a ConfigParser-format file. - - This can be called several times from an application, allowing an end user - the ability to select from various pre-canned configurations (if the - developer provides a mechanism to present the choices and load the chosen - configuration). - """ - import ConfigParser - - cp = ConfigParser.ConfigParser(defaults) - if hasattr(fname, 'readline'): - cp.readfp(fname) - else: - cp.read(fname) - - formatters = _create_formatters(cp) - - # critical section - logging._acquireLock() - try: - logging._handlers.clear() - del logging._handlerList[:] - # Handlers add themselves to logging._handlers - handlers = _install_handlers(cp, formatters) - _install_loggers(cp, handlers, disable_existing_loggers) - finally: - logging._releaseLock() - - -def _resolve(name): - """Resolve a dotted name to a global object.""" - name = name.split('.') - used = name.pop(0) - found = __import__(used) - for n in name: - used = used + '.' + n - try: - found = getattr(found, n) - except AttributeError: - __import__(used) - found = getattr(found, n) - return found - -def _strip_spaces(alist): - return map(lambda x: x.strip(), alist) - -def _encoded(s): - return s if isinstance(s, str) else s.encode('utf-8') - -def _create_formatters(cp): - """Create and return formatters""" - flist = cp.get("formatters", "keys") - if not len(flist): - return {} - flist = flist.split(",") - flist = _strip_spaces(flist) - formatters = {} - for form in flist: - sectname = "formatter_%s" % form - opts = cp.options(sectname) - if "format" in opts: - fs = cp.get(sectname, "format", 1) - else: - fs = None - if "datefmt" in opts: - dfs = cp.get(sectname, "datefmt", 1) - else: - dfs = None - c = logging.Formatter - if "class" in opts: - class_name = cp.get(sectname, "class") - if class_name: - c = _resolve(class_name) - f = c(fs, dfs) - formatters[form] = f - return formatters - - -def _install_handlers(cp, formatters): - """Install and return handlers""" - hlist = cp.get("handlers", "keys") - if not len(hlist): - return {} - hlist = hlist.split(",") - hlist = _strip_spaces(hlist) - handlers = {} - fixups = [] #for inter-handler references - for hand in hlist: - sectname = "handler_%s" % hand - klass = cp.get(sectname, "class") - opts = cp.options(sectname) - if "formatter" in opts: - fmt = cp.get(sectname, "formatter") - else: - fmt = "" - try: - klass = eval(klass, vars(logging)) - except (AttributeError, NameError): - klass = _resolve(klass) - args = cp.get(sectname, "args") - args = eval(args, vars(logging)) - h = klass(*args) - if "level" in opts: - level = cp.get(sectname, "level") - h.setLevel(logging._levelNames[level]) - if len(fmt): - h.setFormatter(formatters[fmt]) - if issubclass(klass, logging.handlers.MemoryHandler): - if "target" in opts: - target = cp.get(sectname,"target") - else: - target = "" - if len(target): #the target handler may not be loaded yet, so keep for later... - fixups.append((h, target)) - handlers[hand] = h - #now all handlers are loaded, fixup inter-handler references... - for h, t in fixups: - h.setTarget(handlers[t]) - return handlers - - -def _install_loggers(cp, handlers, disable_existing_loggers): - """Create and install loggers""" - - # configure the root first - llist = cp.get("loggers", "keys") - llist = llist.split(",") - llist = list(map(lambda x: x.strip(), llist)) - llist.remove("root") - sectname = "logger_root" - root = logging.root - log = root - opts = cp.options(sectname) - if "level" in opts: - level = cp.get(sectname, "level") - log.setLevel(logging._levelNames[level]) - for h in root.handlers[:]: - root.removeHandler(h) - hlist = cp.get(sectname, "handlers") - if len(hlist): - hlist = hlist.split(",") - hlist = _strip_spaces(hlist) - for hand in hlist: - log.addHandler(handlers[hand]) - - #and now the others... - #we don't want to lose the existing loggers, - #since other threads may have pointers to them. - #existing is set to contain all existing loggers, - #and as we go through the new configuration we - #remove any which are configured. At the end, - #what's left in existing is the set of loggers - #which were in the previous configuration but - #which are not in the new configuration. - existing = list(root.manager.loggerDict.keys()) - #The list needs to be sorted so that we can - #avoid disabling child loggers of explicitly - #named loggers. With a sorted list it is easier - #to find the child loggers. - existing.sort() - #We'll keep the list of existing loggers - #which are children of named loggers here... - child_loggers = [] - #now set up the new ones... - for log in llist: - sectname = "logger_%s" % log - qn = cp.get(sectname, "qualname") - opts = cp.options(sectname) - if "propagate" in opts: - propagate = cp.getint(sectname, "propagate") - else: - propagate = 1 - logger = logging.getLogger(qn) - if qn in existing: - i = existing.index(qn) + 1 # start with the entry after qn - prefixed = qn + "." - pflen = len(prefixed) - num_existing = len(existing) - while i < num_existing: - if existing[i][:pflen] == prefixed: - child_loggers.append(existing[i]) - i += 1 - existing.remove(qn) - if "level" in opts: - level = cp.get(sectname, "level") - logger.setLevel(logging._levelNames[level]) - for h in logger.handlers[:]: - logger.removeHandler(h) - logger.propagate = propagate - logger.disabled = 0 - hlist = cp.get(sectname, "handlers") - if len(hlist): - hlist = hlist.split(",") - hlist = _strip_spaces(hlist) - for hand in hlist: - logger.addHandler(handlers[hand]) - - #Disable any old loggers. There's no point deleting - #them as other threads may continue to hold references - #and by disabling them, you stop them doing any logging. - #However, don't disable children of named loggers, as that's - #probably not what was intended by the user. - for log in existing: - logger = root.manager.loggerDict[log] - if log in child_loggers: - logger.level = logging.NOTSET - logger.handlers = [] - logger.propagate = 1 - else: - logger.disabled = disable_existing_loggers - - - -IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I) - - -def valid_ident(s): - m = IDENTIFIER.match(s) - if not m: - raise ValueError('Not a valid Python identifier: %r' % s) - return True - - -class ConvertingMixin(object): - """For ConvertingXXX's, this mixin class provides common functions""" - - def convert_with_key(self, key, value, replace=True): - result = self.configurator.convert(value) - #If the converted value is different, save for next time - if value is not result: - if replace: - self[key] = result - if type(result) in (ConvertingDict, ConvertingList, - ConvertingTuple): - result.parent = self - result.key = key - return result - - def convert(self, value): - result = self.configurator.convert(value) - if value is not result: - if type(result) in (ConvertingDict, ConvertingList, - ConvertingTuple): - result.parent = self - return result - - -# The ConvertingXXX classes are wrappers around standard Python containers, -# and they serve to convert any suitable values in the container. The -# conversion converts base dicts, lists and tuples to their wrapped -# equivalents, whereas strings which match a conversion format are converted -# appropriately. -# -# Each wrapper should have a configurator attribute holding the actual -# configurator to use for conversion. - -class ConvertingDict(dict, ConvertingMixin): - """A converting dictionary wrapper.""" - - def __getitem__(self, key): - value = dict.__getitem__(self, key) - return self.convert_with_key(key, value) - - def get(self, key, default=None): - value = dict.get(self, key, default) - return self.convert_with_key(key, value) - - def pop(self, key, default=None): - value = dict.pop(self, key, default) - return self.convert_with_key(key, value, replace=False) - -class ConvertingList(list, ConvertingMixin): - """A converting list wrapper.""" - def __getitem__(self, key): - value = list.__getitem__(self, key) - return self.convert_with_key(key, value) - - def pop(self, idx=-1): - value = list.pop(self, idx) - return self.convert(value) - -class ConvertingTuple(tuple, ConvertingMixin): - """A converting tuple wrapper.""" - def __getitem__(self, key): - value = tuple.__getitem__(self, key) - # Can't replace a tuple entry. - return self.convert_with_key(key, value, replace=False) - -class BaseConfigurator(object): - """ - The configurator base class which defines some useful defaults. - """ - - CONVERT_PATTERN = re.compile(r'^(?P[a-z]+)://(?P.*)$') - - WORD_PATTERN = re.compile(r'^\s*(\w+)\s*') - DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*') - INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*') - DIGIT_PATTERN = re.compile(r'^\d+$') - - value_converters = { - 'ext' : 'ext_convert', - 'cfg' : 'cfg_convert', - } - - # We might want to use a different one, e.g. importlib - importer = __import__ - - def __init__(self, config): - self.config = ConvertingDict(config) - self.config.configurator = self - # Issue 12718: winpdb replaces __import__ with a Python function, which - # ends up being treated as a bound method. To avoid problems, we - # set the importer on the instance, but leave it defined in the class - # so existing code doesn't break - if type(__import__) == types.FunctionType: - self.importer = __import__ - - def resolve(self, s): - """ - Resolve strings to objects using standard import and attribute - syntax. - """ - name = s.split('.') - used = name.pop(0) - try: - found = self.importer(used) - for frag in name: - used += '.' + frag - try: - found = getattr(found, frag) - except AttributeError: - self.importer(used) - found = getattr(found, frag) - return found - except ImportError: - e, tb = sys.exc_info()[1:] - v = ValueError('Cannot resolve %r: %s' % (s, e)) - v.__cause__, v.__traceback__ = e, tb - raise v - - def ext_convert(self, value): - """Default converter for the ext:// protocol.""" - return self.resolve(value) - - def cfg_convert(self, value): - """Default converter for the cfg:// protocol.""" - rest = value - m = self.WORD_PATTERN.match(rest) - if m is None: - raise ValueError("Unable to convert %r" % value) - else: - rest = rest[m.end():] - d = self.config[m.groups()[0]] - #print d, rest - while rest: - m = self.DOT_PATTERN.match(rest) - if m: - d = d[m.groups()[0]] - else: - m = self.INDEX_PATTERN.match(rest) - if m: - idx = m.groups()[0] - if not self.DIGIT_PATTERN.match(idx): - d = d[idx] - else: - try: - n = int(idx) # try as number first (most likely) - d = d[n] - except TypeError: - d = d[idx] - if m: - rest = rest[m.end():] - else: - raise ValueError('Unable to convert ' - '%r at %r' % (value, rest)) - #rest should be empty - return d - - def convert(self, value): - """ - Convert values to an appropriate type. dicts, lists and tuples are - replaced by their converting alternatives. Strings are checked to - see if they have a conversion format and are converted if they do. - """ - if not isinstance(value, ConvertingDict) and isinstance(value, dict): - value = ConvertingDict(value) - value.configurator = self - elif not isinstance(value, ConvertingList) and isinstance(value, list): - value = ConvertingList(value) - value.configurator = self - elif not isinstance(value, ConvertingTuple) and\ - isinstance(value, tuple): - value = ConvertingTuple(value) - value.configurator = self - elif isinstance(value, basestring): # str for py3k - m = self.CONVERT_PATTERN.match(value) - if m: - d = m.groupdict() - prefix = d['prefix'] - converter = self.value_converters.get(prefix, None) - if converter: - suffix = d['suffix'] - converter = getattr(self, converter) - value = converter(suffix) - return value - - def configure_custom(self, config): - """Configure an object with a user-supplied factory.""" - c = config.pop('()') - if not hasattr(c, '__call__') and hasattr(types, 'ClassType') and type(c) != types.ClassType: - c = self.resolve(c) - props = config.pop('.', None) - # Check for valid identifiers - kwargs = dict([(k, config[k]) for k in config if valid_ident(k)]) - result = c(**kwargs) - if props: - for name, value in props.items(): - setattr(result, name, value) - return result - - def as_tuple(self, value): - """Utility function which converts lists to tuples.""" - if isinstance(value, list): - value = tuple(value) - return value - -class DictConfigurator(BaseConfigurator): - """ - Configure logging using a dictionary-like object to describe the - configuration. - """ - - def configure(self): - """Do the configuration.""" - - config = self.config - if 'version' not in config: - raise ValueError("dictionary doesn't specify a version") - if config['version'] != 1: - raise ValueError("Unsupported version: %s" % config['version']) - incremental = config.pop('incremental', False) - EMPTY_DICT = {} - logging._acquireLock() - try: - if incremental: - handlers = config.get('handlers', EMPTY_DICT) - for name in handlers: - if name not in logging._handlers: - raise ValueError('No handler found with ' - 'name %r' % name) - else: - try: - handler = logging._handlers[name] - handler_config = handlers[name] - level = handler_config.get('level', None) - if level: - handler.setLevel(logging._checkLevel(level)) - except StandardError as e: - raise ValueError('Unable to configure handler ' - '%r: %s' % (name, e)) - loggers = config.get('loggers', EMPTY_DICT) - for name in loggers: - try: - self.configure_logger(name, loggers[name], True) - except StandardError as e: - raise ValueError('Unable to configure logger ' - '%r: %s' % (name, e)) - root = config.get('root', None) - if root: - try: - self.configure_root(root, True) - except StandardError as e: - raise ValueError('Unable to configure root ' - 'logger: %s' % e) - else: - disable_existing = config.pop('disable_existing_loggers', True) - - logging._handlers.clear() - del logging._handlerList[:] - - # Do formatters first - they don't refer to anything else - formatters = config.get('formatters', EMPTY_DICT) - for name in formatters: - try: - formatters[name] = self.configure_formatter( - formatters[name]) - except StandardError as e: - raise ValueError('Unable to configure ' - 'formatter %r: %s' % (name, e)) - # Next, do filters - they don't refer to anything else, either - filters = config.get('filters', EMPTY_DICT) - for name in filters: - try: - filters[name] = self.configure_filter(filters[name]) - except StandardError as e: - raise ValueError('Unable to configure ' - 'filter %r: %s' % (name, e)) - - # Next, do handlers - they refer to formatters and filters - # As handlers can refer to other handlers, sort the keys - # to allow a deterministic order of configuration - handlers = config.get('handlers', EMPTY_DICT) - deferred = [] - for name in sorted(handlers): - try: - handler = self.configure_handler(handlers[name]) - handler.name = name - handlers[name] = handler - except StandardError as e: - if 'target not configured yet' in str(e): - deferred.append(name) - else: - raise ValueError('Unable to configure handler ' - '%r: %s' % (name, e)) - - # Now do any that were deferred - for name in deferred: - try: - handler = self.configure_handler(handlers[name]) - handler.name = name - handlers[name] = handler - except StandardError as e: - raise ValueError('Unable to configure handler ' - '%r: %s' % (name, e)) - - # Next, do loggers - they refer to handlers and filters - - #we don't want to lose the existing loggers, - #since other threads may have pointers to them. - #existing is set to contain all existing loggers, - #and as we go through the new configuration we - #remove any which are configured. At the end, - #what's left in existing is the set of loggers - #which were in the previous configuration but - #which are not in the new configuration. - root = logging.root - existing = root.manager.loggerDict.keys() - #The list needs to be sorted so that we can - #avoid disabling child loggers of explicitly - #named loggers. With a sorted list it is easier - #to find the child loggers. - existing.sort() - #We'll keep the list of existing loggers - #which are children of named loggers here... - child_loggers = [] - #now set up the new ones... - loggers = config.get('loggers', EMPTY_DICT) - for name in loggers: - name = _encoded(name) - if name in existing: - i = existing.index(name) - prefixed = name + "." - pflen = len(prefixed) - num_existing = len(existing) - i = i + 1 # look at the entry after name - while (i < num_existing) and\ - (existing[i][:pflen] == prefixed): - child_loggers.append(existing[i]) - i = i + 1 - existing.remove(name) - try: - self.configure_logger(name, loggers[name]) - except StandardError as e: - raise ValueError('Unable to configure logger ' - '%r: %s' % (name, e)) - - #Disable any old loggers. There's no point deleting - #them as other threads may continue to hold references - #and by disabling them, you stop them doing any logging. - #However, don't disable children of named loggers, as that's - #probably not what was intended by the user. - for log in existing: - logger = root.manager.loggerDict[log] - if log in child_loggers: - logger.level = logging.NOTSET - logger.handlers = [] - logger.propagate = True - elif disable_existing: - logger.disabled = True - - # And finally, do the root logger - root = config.get('root', None) - if root: - try: - self.configure_root(root) - except StandardError as e: - raise ValueError('Unable to configure root ' - 'logger: %s' % e) - finally: - logging._releaseLock() - - def configure_formatter(self, config): - """Configure a formatter from a dictionary.""" - if '()' in config: - factory = config['()'] # for use in exception handler - try: - result = self.configure_custom(config) - except TypeError as te: - if "'format'" not in str(te): - raise - #Name of parameter changed from fmt to format. - #Retry with old name. - #This is so that code can be used with older Python versions - #(e.g. by Django) - config['fmt'] = config.pop('format') - config['()'] = factory - result = self.configure_custom(config) - else: - fmt = config.get('format', None) - dfmt = config.get('datefmt', None) - result = logging.Formatter(fmt, dfmt) - return result - - def configure_filter(self, config): - """Configure a filter from a dictionary.""" - if '()' in config: - result = self.configure_custom(config) - else: - name = config.get('name', '') - result = logging.Filter(name) - return result - - def add_filters(self, filterer, filters): - """Add filters to a filterer from a list of names.""" - for f in filters: - try: - filterer.addFilter(self.config['filters'][f]) - except StandardError as e: - raise ValueError('Unable to add filter %r: %s' % (f, e)) - - def configure_handler(self, config): - """Configure a handler from a dictionary.""" - formatter = config.pop('formatter', None) - if formatter: - try: - formatter = self.config['formatters'][formatter] - except StandardError as e: - raise ValueError('Unable to set formatter ' - '%r: %s' % (formatter, e)) - level = config.pop('level', None) - filters = config.pop('filters', None) - if '()' in config: - c = config.pop('()') - if not hasattr(c, '__call__') and hasattr(types, 'ClassType') and type(c) != types.ClassType: - c = self.resolve(c) - factory = c - else: - cname = config.pop('class') - klass = self.resolve(cname) - #Special case for handler which refers to another handler - if issubclass(klass, logging.handlers.MemoryHandler) and\ - 'target' in config: - try: - th = self.config['handlers'][config['target']] - if not isinstance(th, logging.Handler): - config['class'] = cname # restore for deferred configuration - raise StandardError('target not configured yet') - config['target'] = th - except StandardError as e: - raise ValueError('Unable to set target handler ' - '%r: %s' % (config['target'], e)) - elif issubclass(klass, logging.handlers.SMTPHandler) and\ - 'mailhost' in config: - config['mailhost'] = self.as_tuple(config['mailhost']) - elif issubclass(klass, logging.handlers.SysLogHandler) and\ - 'address' in config: - config['address'] = self.as_tuple(config['address']) - factory = klass - kwargs = dict([(k, config[k]) for k in config if valid_ident(k)]) - try: - result = factory(**kwargs) - except TypeError as te: - if "'stream'" not in str(te): - raise - #The argument name changed from strm to stream - #Retry with old name. - #This is so that code can be used with older Python versions - #(e.g. by Django) - kwargs['strm'] = kwargs.pop('stream') - result = factory(**kwargs) - if formatter: - result.setFormatter(formatter) - if level is not None: - result.setLevel(logging._checkLevel(level)) - if filters: - self.add_filters(result, filters) - return result - - def add_handlers(self, logger, handlers): - """Add handlers to a logger from a list of names.""" - for h in handlers: - try: - logger.addHandler(self.config['handlers'][h]) - except StandardError as e: - raise ValueError('Unable to add handler %r: %s' % (h, e)) - - def common_logger_config(self, logger, config, incremental=False): - """ - Perform configuration which is common to root and non-root loggers. - """ - level = config.get('level', None) - if level is not None: - logger.setLevel(logging._checkLevel(level)) - if not incremental: - #Remove any existing handlers - for h in logger.handlers[:]: - logger.removeHandler(h) - handlers = config.get('handlers', None) - if handlers: - self.add_handlers(logger, handlers) - filters = config.get('filters', None) - if filters: - self.add_filters(logger, filters) - - def configure_logger(self, name, config, incremental=False): - """Configure a non-root logger from a dictionary.""" - logger = logging.getLogger(name) - self.common_logger_config(logger, config, incremental) - propagate = config.get('propagate', None) - if propagate is not None: - logger.propagate = propagate - - def configure_root(self, config, incremental=False): - """Configure a root logger from a dictionary.""" - root = logging.getLogger() - self.common_logger_config(root, config, incremental) - -dictConfigClass = DictConfigurator - -def dictConfig(config): - """Configure logging using a dictionary.""" - dictConfigClass(config).configure() - - -def listen(port=DEFAULT_LOGGING_CONFIG_PORT): - """ - Start up a socket server on the specified port, and listen for new - configurations. - - These will be sent as a file suitable for processing by fileConfig(). - Returns a Thread object on which you can call start() to start the server, - and which you can join() when appropriate. To stop the server, call - stopListening(). - """ - if not thread: - raise NotImplementedError("listen() needs threading to work") - - class ConfigStreamHandler(StreamRequestHandler): - """ - Handler for a logging configuration request. - - It expects a completely new logging configuration and uses fileConfig - to install it. - """ - def handle(self): - """ - Handle a request. - - Each request is expected to be a 4-byte length, packed using - struct.pack(">L", n), followed by the config file. - Uses fileConfig() to do the grunt work. - """ - import tempfile - try: - conn = self.connection - chunk = conn.recv(4) - if len(chunk) == 4: - slen = struct.unpack(">L", chunk)[0] - chunk = self.connection.recv(slen) - while len(chunk) < slen: - chunk = chunk + conn.recv(slen - len(chunk)) - try: - import json - d =json.loads(chunk) - assert isinstance(d, dict) - dictConfig(d) - except: - #Apply new configuration. - - file = cStringIO.StringIO(chunk) - try: - fileConfig(file) - except (KeyboardInterrupt, SystemExit): - raise - except: - traceback.print_exc() - if self.server.ready: - self.server.ready.set() - except socket.error as e: - if e.errno != RESET_ERROR: - raise - - class ConfigSocketReceiver(ThreadingTCPServer): - """ - A simple TCP socket-based logging config receiver. - """ - - allow_reuse_address = 1 - - def __init__(self, host='localhost', port=DEFAULT_LOGGING_CONFIG_PORT, - handler=None, ready=None): - ThreadingTCPServer.__init__(self, (host, port), handler) - logging._acquireLock() - self.abort = 0 - logging._releaseLock() - self.timeout = 1 - self.ready = ready - - def serve_until_stopped(self): - import select - abort = 0 - while not abort: - rd, wr, ex = select.select([self.socket.fileno()], - [], [], - self.timeout) - if rd: - self.handle_request() - logging._acquireLock() - abort = self.abort - logging._releaseLock() - self.socket.close() - - class Server(threading.Thread): - - def __init__(self, rcvr, hdlr, port): - super(Server, self).__init__() - self.rcvr = rcvr - self.hdlr = hdlr - self.port = port - self.ready = threading.Event() - - def run(self): - server = self.rcvr(port=self.port, handler=self.hdlr, - ready=self.ready) - if self.port == 0: - self.port = server.server_address[1] - self.ready.set() - global _listener - logging._acquireLock() - _listener = server - logging._releaseLock() - server.serve_until_stopped() - - return Server(ConfigSocketReceiver, ConfigStreamHandler, port) - -def stopListening(): - """ - Stop the listening server which was created with a call to listen(). - """ - global _listener - logging._acquireLock() - try: - if _listener: - _listener.abort = 1 - _listener = None - finally: - logging._releaseLock() diff --git a/python/Lib/logging/handlers.py b/python/Lib/logging/handlers.py deleted file mode 100755 index e430ab7b9b..0000000000 --- a/python/Lib/logging/handlers.py +++ /dev/null @@ -1,1227 +0,0 @@ -# Copyright 2001-2013 by Vinay Sajip. All Rights Reserved. -# -# Permission to use, copy, modify, and distribute this software and its -# documentation for any purpose and without fee is hereby granted, -# provided that the above copyright notice appear in all copies and that -# both that copyright notice and this permission notice appear in -# supporting documentation, and that the name of Vinay Sajip -# not be used in advertising or publicity pertaining to distribution -# of the software without specific, written prior permission. -# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL -# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR -# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT -# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -""" -Additional handlers for the logging package for Python. The core package is -based on PEP 282 and comments thereto in comp.lang.python. - -Copyright (C) 2001-2013 Vinay Sajip. All Rights Reserved. - -To use, simply 'import logging.handlers' and log away! -""" - -import errno, logging, socket, os, cPickle, struct, time, re -from stat import ST_DEV, ST_INO, ST_MTIME - -try: - import codecs -except ImportError: - codecs = None -try: - unicode - _unicode = True -except NameError: - _unicode = False - -# -# Some constants... -# - -DEFAULT_TCP_LOGGING_PORT = 9020 -DEFAULT_UDP_LOGGING_PORT = 9021 -DEFAULT_HTTP_LOGGING_PORT = 9022 -DEFAULT_SOAP_LOGGING_PORT = 9023 -SYSLOG_UDP_PORT = 514 -SYSLOG_TCP_PORT = 514 - -_MIDNIGHT = 24 * 60 * 60 # number of seconds in a day - -class BaseRotatingHandler(logging.FileHandler): - """ - Base class for handlers that rotate log files at a certain point. - Not meant to be instantiated directly. Instead, use RotatingFileHandler - or TimedRotatingFileHandler. - """ - def __init__(self, filename, mode, encoding=None, delay=0): - """ - Use the specified filename for streamed logging - """ - if codecs is None: - encoding = None - logging.FileHandler.__init__(self, filename, mode, encoding, delay) - self.mode = mode - self.encoding = encoding - - def emit(self, record): - """ - Emit a record. - - Output the record to the file, catering for rollover as described - in doRollover(). - """ - try: - if self.shouldRollover(record): - self.doRollover() - logging.FileHandler.emit(self, record) - except (KeyboardInterrupt, SystemExit): - raise - except: - self.handleError(record) - -class RotatingFileHandler(BaseRotatingHandler): - """ - Handler for logging to a set of files, which switches from one file - to the next when the current file reaches a certain size. - """ - def __init__(self, filename, mode='a', maxBytes=0, backupCount=0, encoding=None, delay=0): - """ - Open the specified file and use it as the stream for logging. - - By default, the file grows indefinitely. You can specify particular - values of maxBytes and backupCount to allow the file to rollover at - a predetermined size. - - Rollover occurs whenever the current log file is nearly maxBytes in - length. If backupCount is >= 1, the system will successively create - new files with the same pathname as the base file, but with extensions - ".1", ".2" etc. appended to it. For example, with a backupCount of 5 - and a base file name of "app.log", you would get "app.log", - "app.log.1", "app.log.2", ... through to "app.log.5". The file being - written to is always "app.log" - when it gets filled up, it is closed - and renamed to "app.log.1", and if files "app.log.1", "app.log.2" etc. - exist, then they are renamed to "app.log.2", "app.log.3" etc. - respectively. - - If maxBytes is zero, rollover never occurs. - """ - # If rotation/rollover is wanted, it doesn't make sense to use another - # mode. If for example 'w' were specified, then if there were multiple - # runs of the calling application, the logs from previous runs would be - # lost if the 'w' is respected, because the log file would be truncated - # on each run. - if maxBytes > 0: - mode = 'a' - BaseRotatingHandler.__init__(self, filename, mode, encoding, delay) - self.maxBytes = maxBytes - self.backupCount = backupCount - - def doRollover(self): - """ - Do a rollover, as described in __init__(). - """ - if self.stream: - self.stream.close() - self.stream = None - if self.backupCount > 0: - for i in range(self.backupCount - 1, 0, -1): - sfn = "%s.%d" % (self.baseFilename, i) - dfn = "%s.%d" % (self.baseFilename, i + 1) - if os.path.exists(sfn): - #print "%s -> %s" % (sfn, dfn) - if os.path.exists(dfn): - os.remove(dfn) - os.rename(sfn, dfn) - dfn = self.baseFilename + ".1" - if os.path.exists(dfn): - os.remove(dfn) - # Issue 18940: A file may not have been created if delay is True. - if os.path.exists(self.baseFilename): - os.rename(self.baseFilename, dfn) - if not self.delay: - self.stream = self._open() - - def shouldRollover(self, record): - """ - Determine if rollover should occur. - - Basically, see if the supplied record would cause the file to exceed - the size limit we have. - """ - if self.stream is None: # delay was set... - self.stream = self._open() - if self.maxBytes > 0: # are we rolling over? - msg = "%s\n" % self.format(record) - self.stream.seek(0, 2) #due to non-posix-compliant Windows feature - if self.stream.tell() + len(msg) >= self.maxBytes: - return 1 - return 0 - -class TimedRotatingFileHandler(BaseRotatingHandler): - """ - Handler for logging to a file, rotating the log file at certain timed - intervals. - - If backupCount is > 0, when rollover is done, no more than backupCount - files are kept - the oldest ones are deleted. - """ - def __init__(self, filename, when='h', interval=1, backupCount=0, encoding=None, delay=False, utc=False): - BaseRotatingHandler.__init__(self, filename, 'a', encoding, delay) - self.when = when.upper() - self.backupCount = backupCount - self.utc = utc - # Calculate the real rollover interval, which is just the number of - # seconds between rollovers. Also set the filename suffix used when - # a rollover occurs. Current 'when' events supported: - # S - Seconds - # M - Minutes - # H - Hours - # D - Days - # midnight - roll over at midnight - # W{0-6} - roll over on a certain day; 0 - Monday - # - # Case of the 'when' specifier is not important; lower or upper case - # will work. - if self.when == 'S': - self.interval = 1 # one second - self.suffix = "%Y-%m-%d_%H-%M-%S" - self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$" - elif self.when == 'M': - self.interval = 60 # one minute - self.suffix = "%Y-%m-%d_%H-%M" - self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}$" - elif self.when == 'H': - self.interval = 60 * 60 # one hour - self.suffix = "%Y-%m-%d_%H" - self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}$" - elif self.when == 'D' or self.when == 'MIDNIGHT': - self.interval = 60 * 60 * 24 # one day - self.suffix = "%Y-%m-%d" - self.extMatch = r"^\d{4}-\d{2}-\d{2}$" - elif self.when.startswith('W'): - self.interval = 60 * 60 * 24 * 7 # one week - if len(self.when) != 2: - raise ValueError("You must specify a day for weekly rollover from 0 to 6 (0 is Monday): %s" % self.when) - if self.when[1] < '0' or self.when[1] > '6': - raise ValueError("Invalid day specified for weekly rollover: %s" % self.when) - self.dayOfWeek = int(self.when[1]) - self.suffix = "%Y-%m-%d" - self.extMatch = r"^\d{4}-\d{2}-\d{2}$" - else: - raise ValueError("Invalid rollover interval specified: %s" % self.when) - - self.extMatch = re.compile(self.extMatch) - self.interval = self.interval * interval # multiply by units requested - if os.path.exists(filename): - t = os.stat(filename)[ST_MTIME] - else: - t = int(time.time()) - self.rolloverAt = self.computeRollover(t) - - def computeRollover(self, currentTime): - """ - Work out the rollover time based on the specified time. - """ - result = currentTime + self.interval - # If we are rolling over at midnight or weekly, then the interval is already known. - # What we need to figure out is WHEN the next interval is. In other words, - # if you are rolling over at midnight, then your base interval is 1 day, - # but you want to start that one day clock at midnight, not now. So, we - # have to fudge the rolloverAt value in order to trigger the first rollover - # at the right time. After that, the regular interval will take care of - # the rest. Note that this code doesn't care about leap seconds. :) - if self.when == 'MIDNIGHT' or self.when.startswith('W'): - # This could be done with less code, but I wanted it to be clear - if self.utc: - t = time.gmtime(currentTime) - else: - t = time.localtime(currentTime) - currentHour = t[3] - currentMinute = t[4] - currentSecond = t[5] - # r is the number of seconds left between now and midnight - r = _MIDNIGHT - ((currentHour * 60 + currentMinute) * 60 + - currentSecond) - result = currentTime + r - # If we are rolling over on a certain day, add in the number of days until - # the next rollover, but offset by 1 since we just calculated the time - # until the next day starts. There are three cases: - # Case 1) The day to rollover is today; in this case, do nothing - # Case 2) The day to rollover is further in the interval (i.e., today is - # day 2 (Wednesday) and rollover is on day 6 (Sunday). Days to - # next rollover is simply 6 - 2 - 1, or 3. - # Case 3) The day to rollover is behind us in the interval (i.e., today - # is day 5 (Saturday) and rollover is on day 3 (Thursday). - # Days to rollover is 6 - 5 + 3, or 4. In this case, it's the - # number of days left in the current week (1) plus the number - # of days in the next week until the rollover day (3). - # The calculations described in 2) and 3) above need to have a day added. - # This is because the above time calculation takes us to midnight on this - # day, i.e. the start of the next day. - if self.when.startswith('W'): - day = t[6] # 0 is Monday - if day != self.dayOfWeek: - if day < self.dayOfWeek: - daysToWait = self.dayOfWeek - day - else: - daysToWait = 6 - day + self.dayOfWeek + 1 - newRolloverAt = result + (daysToWait * (60 * 60 * 24)) - if not self.utc: - dstNow = t[-1] - dstAtRollover = time.localtime(newRolloverAt)[-1] - if dstNow != dstAtRollover: - if not dstNow: # DST kicks in before next rollover, so we need to deduct an hour - addend = -3600 - else: # DST bows out before next rollover, so we need to add an hour - addend = 3600 - newRolloverAt += addend - result = newRolloverAt - return result - - def shouldRollover(self, record): - """ - Determine if rollover should occur. - - record is not used, as we are just comparing times, but it is needed so - the method signatures are the same - """ - t = int(time.time()) - if t >= self.rolloverAt: - return 1 - #print "No need to rollover: %d, %d" % (t, self.rolloverAt) - return 0 - - def getFilesToDelete(self): - """ - Determine the files to delete when rolling over. - - More specific than the earlier method, which just used glob.glob(). - """ - dirName, baseName = os.path.split(self.baseFilename) - fileNames = os.listdir(dirName) - result = [] - prefix = baseName + "." - plen = len(prefix) - for fileName in fileNames: - if fileName[:plen] == prefix: - suffix = fileName[plen:] - if self.extMatch.match(suffix): - result.append(os.path.join(dirName, fileName)) - result.sort() - if len(result) < self.backupCount: - result = [] - else: - result = result[:len(result) - self.backupCount] - return result - - def doRollover(self): - """ - do a rollover; in this case, a date/time stamp is appended to the filename - when the rollover happens. However, you want the file to be named for the - start of the interval, not the current time. If there is a backup count, - then we have to get a list of matching filenames, sort them and remove - the one with the oldest suffix. - """ - if self.stream: - self.stream.close() - self.stream = None - # get the time that this sequence started at and make it a TimeTuple - currentTime = int(time.time()) - dstNow = time.localtime(currentTime)[-1] - t = self.rolloverAt - self.interval - if self.utc: - timeTuple = time.gmtime(t) - else: - timeTuple = time.localtime(t) - dstThen = timeTuple[-1] - if dstNow != dstThen: - if dstNow: - addend = 3600 - else: - addend = -3600 - timeTuple = time.localtime(t + addend) - dfn = self.baseFilename + "." + time.strftime(self.suffix, timeTuple) - if os.path.exists(dfn): - os.remove(dfn) - # Issue 18940: A file may not have been created if delay is True. - if os.path.exists(self.baseFilename): - os.rename(self.baseFilename, dfn) - if self.backupCount > 0: - for s in self.getFilesToDelete(): - os.remove(s) - if not self.delay: - self.stream = self._open() - newRolloverAt = self.computeRollover(currentTime) - while newRolloverAt <= currentTime: - newRolloverAt = newRolloverAt + self.interval - #If DST changes and midnight or weekly rollover, adjust for this. - if (self.when == 'MIDNIGHT' or self.when.startswith('W')) and not self.utc: - dstAtRollover = time.localtime(newRolloverAt)[-1] - if dstNow != dstAtRollover: - if not dstNow: # DST kicks in before next rollover, so we need to deduct an hour - addend = -3600 - else: # DST bows out before next rollover, so we need to add an hour - addend = 3600 - newRolloverAt += addend - self.rolloverAt = newRolloverAt - -class WatchedFileHandler(logging.FileHandler): - """ - A handler for logging to a file, which watches the file - to see if it has changed while in use. This can happen because of - usage of programs such as newsyslog and logrotate which perform - log file rotation. This handler, intended for use under Unix, - watches the file to see if it has changed since the last emit. - (A file has changed if its device or inode have changed.) - If it has changed, the old file stream is closed, and the file - opened to get a new stream. - - This handler is not appropriate for use under Windows, because - under Windows open files cannot be moved or renamed - logging - opens the files with exclusive locks - and so there is no need - for such a handler. Furthermore, ST_INO is not supported under - Windows; stat always returns zero for this value. - - This handler is based on a suggestion and patch by Chad J. - Schroeder. - """ - def __init__(self, filename, mode='a', encoding=None, delay=0): - logging.FileHandler.__init__(self, filename, mode, encoding, delay) - self.dev, self.ino = -1, -1 - self._statstream() - - def _statstream(self): - if self.stream: - sres = os.fstat(self.stream.fileno()) - self.dev, self.ino = sres[ST_DEV], sres[ST_INO] - - def emit(self, record): - """ - Emit a record. - - First check if the underlying file has changed, and if it - has, close the old stream and reopen the file to get the - current stream. - """ - # Reduce the chance of race conditions by stat'ing by path only - # once and then fstat'ing our new fd if we opened a new log stream. - # See issue #14632: Thanks to John Mulligan for the problem report - # and patch. - try: - # stat the file by path, checking for existence - sres = os.stat(self.baseFilename) - except OSError as err: - if err.errno == errno.ENOENT: - sres = None - else: - raise - # compare file system stat with that of our stream file handle - if not sres or sres[ST_DEV] != self.dev or sres[ST_INO] != self.ino: - if self.stream is not None: - # we have an open file handle, clean it up - self.stream.flush() - self.stream.close() - self.stream = None # See Issue #21742: _open () might fail. - # open a new file handle and get new stat info from that fd - self.stream = self._open() - self._statstream() - logging.FileHandler.emit(self, record) - -class SocketHandler(logging.Handler): - """ - A handler class which writes logging records, in pickle format, to - a streaming socket. The socket is kept open across logging calls. - If the peer resets it, an attempt is made to reconnect on the next call. - The pickle which is sent is that of the LogRecord's attribute dictionary - (__dict__), so that the receiver does not need to have the logging module - installed in order to process the logging event. - - To unpickle the record at the receiving end into a LogRecord, use the - makeLogRecord function. - """ - - def __init__(self, host, port): - """ - Initializes the handler with a specific host address and port. - - The attribute 'closeOnError' is set to 1 - which means that if - a socket error occurs, the socket is silently closed and then - reopened on the next logging call. - """ - logging.Handler.__init__(self) - self.host = host - self.port = port - self.sock = None - self.closeOnError = 0 - self.retryTime = None - # - # Exponential backoff parameters. - # - self.retryStart = 1.0 - self.retryMax = 30.0 - self.retryFactor = 2.0 - - def makeSocket(self, timeout=1): - """ - A factory method which allows subclasses to define the precise - type of socket they want. - """ - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - if hasattr(s, 'settimeout'): - s.settimeout(timeout) - s.connect((self.host, self.port)) - return s - - def createSocket(self): - """ - Try to create a socket, using an exponential backoff with - a max retry time. Thanks to Robert Olson for the original patch - (SF #815911) which has been slightly refactored. - """ - now = time.time() - # Either retryTime is None, in which case this - # is the first time back after a disconnect, or - # we've waited long enough. - if self.retryTime is None: - attempt = 1 - else: - attempt = (now >= self.retryTime) - if attempt: - try: - self.sock = self.makeSocket() - self.retryTime = None # next time, no delay before trying - except socket.error: - #Creation failed, so set the retry time and return. - if self.retryTime is None: - self.retryPeriod = self.retryStart - else: - self.retryPeriod = self.retryPeriod * self.retryFactor - if self.retryPeriod > self.retryMax: - self.retryPeriod = self.retryMax - self.retryTime = now + self.retryPeriod - - def send(self, s): - """ - Send a pickled string to the socket. - - This function allows for partial sends which can happen when the - network is busy. - """ - if self.sock is None: - self.createSocket() - #self.sock can be None either because we haven't reached the retry - #time yet, or because we have reached the retry time and retried, - #but are still unable to connect. - if self.sock: - try: - if hasattr(self.sock, "sendall"): - self.sock.sendall(s) - else: - sentsofar = 0 - left = len(s) - while left > 0: - sent = self.sock.send(s[sentsofar:]) - sentsofar = sentsofar + sent - left = left - sent - except socket.error: - self.sock.close() - self.sock = None # so we can call createSocket next time - - def makePickle(self, record): - """ - Pickles the record in binary format with a length prefix, and - returns it ready for transmission across the socket. - """ - ei = record.exc_info - if ei: - # just to get traceback text into record.exc_text ... - dummy = self.format(record) - record.exc_info = None # to avoid Unpickleable error - # See issue #14436: If msg or args are objects, they may not be - # available on the receiving end. So we convert the msg % args - # to a string, save it as msg and zap the args. - d = dict(record.__dict__) - d['msg'] = record.getMessage() - d['args'] = None - s = cPickle.dumps(d, 1) - if ei: - record.exc_info = ei # for next handler - slen = struct.pack(">L", len(s)) - return slen + s - - def handleError(self, record): - """ - Handle an error during logging. - - An error has occurred during logging. Most likely cause - - connection lost. Close the socket so that we can retry on the - next event. - """ - if self.closeOnError and self.sock: - self.sock.close() - self.sock = None #try to reconnect next time - else: - logging.Handler.handleError(self, record) - - def emit(self, record): - """ - Emit a record. - - Pickles the record and writes it to the socket in binary format. - If there is an error with the socket, silently drop the packet. - If there was a problem with the socket, re-establishes the - socket. - """ - try: - s = self.makePickle(record) - self.send(s) - except (KeyboardInterrupt, SystemExit): - raise - except: - self.handleError(record) - - def close(self): - """ - Closes the socket. - """ - self.acquire() - try: - sock = self.sock - if sock: - self.sock = None - sock.close() - finally: - self.release() - logging.Handler.close(self) - -class DatagramHandler(SocketHandler): - """ - A handler class which writes logging records, in pickle format, to - a datagram socket. The pickle which is sent is that of the LogRecord's - attribute dictionary (__dict__), so that the receiver does not need to - have the logging module installed in order to process the logging event. - - To unpickle the record at the receiving end into a LogRecord, use the - makeLogRecord function. - - """ - def __init__(self, host, port): - """ - Initializes the handler with a specific host address and port. - """ - SocketHandler.__init__(self, host, port) - self.closeOnError = 0 - - def makeSocket(self): - """ - The factory method of SocketHandler is here overridden to create - a UDP socket (SOCK_DGRAM). - """ - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - return s - - def send(self, s): - """ - Send a pickled string to a socket. - - This function no longer allows for partial sends which can happen - when the network is busy - UDP does not guarantee delivery and - can deliver packets out of sequence. - """ - if self.sock is None: - self.createSocket() - self.sock.sendto(s, (self.host, self.port)) - -class SysLogHandler(logging.Handler): - """ - A handler class which sends formatted logging records to a syslog - server. Based on Sam Rushing's syslog module: - http://www.nightmare.com/squirl/python-ext/misc/syslog.py - Contributed by Nicolas Untz (after which minor refactoring changes - have been made). - """ - - # from : - # ====================================================================== - # priorities/facilities are encoded into a single 32-bit quantity, where - # the bottom 3 bits are the priority (0-7) and the top 28 bits are the - # facility (0-big number). Both the priorities and the facilities map - # roughly one-to-one to strings in the syslogd(8) source code. This - # mapping is included in this file. - # - # priorities (these are ordered) - - LOG_EMERG = 0 # system is unusable - LOG_ALERT = 1 # action must be taken immediately - LOG_CRIT = 2 # critical conditions - LOG_ERR = 3 # error conditions - LOG_WARNING = 4 # warning conditions - LOG_NOTICE = 5 # normal but significant condition - LOG_INFO = 6 # informational - LOG_DEBUG = 7 # debug-level messages - - # facility codes - LOG_KERN = 0 # kernel messages - LOG_USER = 1 # random user-level messages - LOG_MAIL = 2 # mail system - LOG_DAEMON = 3 # system daemons - LOG_AUTH = 4 # security/authorization messages - LOG_SYSLOG = 5 # messages generated internally by syslogd - LOG_LPR = 6 # line printer subsystem - LOG_NEWS = 7 # network news subsystem - LOG_UUCP = 8 # UUCP subsystem - LOG_CRON = 9 # clock daemon - LOG_AUTHPRIV = 10 # security/authorization messages (private) - LOG_FTP = 11 # FTP daemon - - # other codes through 15 reserved for system use - LOG_LOCAL0 = 16 # reserved for local use - LOG_LOCAL1 = 17 # reserved for local use - LOG_LOCAL2 = 18 # reserved for local use - LOG_LOCAL3 = 19 # reserved for local use - LOG_LOCAL4 = 20 # reserved for local use - LOG_LOCAL5 = 21 # reserved for local use - LOG_LOCAL6 = 22 # reserved for local use - LOG_LOCAL7 = 23 # reserved for local use - - priority_names = { - "alert": LOG_ALERT, - "crit": LOG_CRIT, - "critical": LOG_CRIT, - "debug": LOG_DEBUG, - "emerg": LOG_EMERG, - "err": LOG_ERR, - "error": LOG_ERR, # DEPRECATED - "info": LOG_INFO, - "notice": LOG_NOTICE, - "panic": LOG_EMERG, # DEPRECATED - "warn": LOG_WARNING, # DEPRECATED - "warning": LOG_WARNING, - } - - facility_names = { - "auth": LOG_AUTH, - "authpriv": LOG_AUTHPRIV, - "cron": LOG_CRON, - "daemon": LOG_DAEMON, - "ftp": LOG_FTP, - "kern": LOG_KERN, - "lpr": LOG_LPR, - "mail": LOG_MAIL, - "news": LOG_NEWS, - "security": LOG_AUTH, # DEPRECATED - "syslog": LOG_SYSLOG, - "user": LOG_USER, - "uucp": LOG_UUCP, - "local0": LOG_LOCAL0, - "local1": LOG_LOCAL1, - "local2": LOG_LOCAL2, - "local3": LOG_LOCAL3, - "local4": LOG_LOCAL4, - "local5": LOG_LOCAL5, - "local6": LOG_LOCAL6, - "local7": LOG_LOCAL7, - } - - #The map below appears to be trivially lowercasing the key. However, - #there's more to it than meets the eye - in some locales, lowercasing - #gives unexpected results. See SF #1524081: in the Turkish locale, - #"INFO".lower() != "info" - priority_map = { - "DEBUG" : "debug", - "INFO" : "info", - "WARNING" : "warning", - "ERROR" : "error", - "CRITICAL" : "critical" - } - - def __init__(self, address=('localhost', SYSLOG_UDP_PORT), - facility=LOG_USER, socktype=None): - """ - Initialize a handler. - - If address is specified as a string, a UNIX socket is used. To log to a - local syslogd, "SysLogHandler(address="/dev/log")" can be used. - If facility is not specified, LOG_USER is used. If socktype is - specified as socket.SOCK_DGRAM or socket.SOCK_STREAM, that specific - socket type will be used. For Unix sockets, you can also specify a - socktype of None, in which case socket.SOCK_DGRAM will be used, falling - back to socket.SOCK_STREAM. - """ - logging.Handler.__init__(self) - - self.address = address - self.facility = facility - self.socktype = socktype - - if isinstance(address, basestring): - self.unixsocket = 1 - self._connect_unixsocket(address) - else: - self.unixsocket = 0 - if socktype is None: - socktype = socket.SOCK_DGRAM - self.socket = socket.socket(socket.AF_INET, socktype) - if socktype == socket.SOCK_STREAM: - self.socket.connect(address) - self.socktype = socktype - self.formatter = None - - def _connect_unixsocket(self, address): - use_socktype = self.socktype - if use_socktype is None: - use_socktype = socket.SOCK_DGRAM - self.socket = socket.socket(socket.AF_UNIX, use_socktype) - try: - self.socket.connect(address) - # it worked, so set self.socktype to the used type - self.socktype = use_socktype - except socket.error: - self.socket.close() - if self.socktype is not None: - # user didn't specify falling back, so fail - raise - use_socktype = socket.SOCK_STREAM - self.socket = socket.socket(socket.AF_UNIX, use_socktype) - try: - self.socket.connect(address) - # it worked, so set self.socktype to the used type - self.socktype = use_socktype - except socket.error: - self.socket.close() - raise - - # curious: when talking to the unix-domain '/dev/log' socket, a - # zero-terminator seems to be required. this string is placed - # into a class variable so that it can be overridden if - # necessary. - log_format_string = '<%d>%s\000' - - def encodePriority(self, facility, priority): - """ - Encode the facility and priority. You can pass in strings or - integers - if strings are passed, the facility_names and - priority_names mapping dictionaries are used to convert them to - integers. - """ - if isinstance(facility, basestring): - facility = self.facility_names[facility] - if isinstance(priority, basestring): - priority = self.priority_names[priority] - return (facility << 3) | priority - - def close (self): - """ - Closes the socket. - """ - self.acquire() - try: - if self.unixsocket: - self.socket.close() - finally: - self.release() - logging.Handler.close(self) - - def mapPriority(self, levelName): - """ - Map a logging level name to a key in the priority_names map. - This is useful in two scenarios: when custom levels are being - used, and in the case where you can't do a straightforward - mapping by lowercasing the logging level name because of locale- - specific issues (see SF #1524081). - """ - return self.priority_map.get(levelName, "warning") - - def emit(self, record): - """ - Emit a record. - - The record is formatted, and then sent to the syslog server. If - exception information is present, it is NOT sent to the server. - """ - try: - msg = self.format(record) + '\000' - """ - We need to convert record level to lowercase, maybe this will - change in the future. - """ - prio = '<%d>' % self.encodePriority(self.facility, - self.mapPriority(record.levelname)) - # Message is a string. Convert to bytes as required by RFC 5424 - if type(msg) is unicode: - msg = msg.encode('utf-8') - msg = prio + msg - if self.unixsocket: - try: - self.socket.send(msg) - except socket.error: - self.socket.close() # See issue 17981 - self._connect_unixsocket(self.address) - self.socket.send(msg) - elif self.socktype == socket.SOCK_DGRAM: - self.socket.sendto(msg, self.address) - else: - self.socket.sendall(msg) - except (KeyboardInterrupt, SystemExit): - raise - except: - self.handleError(record) - -class SMTPHandler(logging.Handler): - """ - A handler class which sends an SMTP email for each logging event. - """ - def __init__(self, mailhost, fromaddr, toaddrs, subject, - credentials=None, secure=None): - """ - Initialize the handler. - - Initialize the instance with the from and to addresses and subject - line of the email. To specify a non-standard SMTP port, use the - (host, port) tuple format for the mailhost argument. To specify - authentication credentials, supply a (username, password) tuple - for the credentials argument. To specify the use of a secure - protocol (TLS), pass in a tuple for the secure argument. This will - only be used when authentication credentials are supplied. The tuple - will be either an empty tuple, or a single-value tuple with the name - of a keyfile, or a 2-value tuple with the names of the keyfile and - certificate file. (This tuple is passed to the `starttls` method). - """ - logging.Handler.__init__(self) - if isinstance(mailhost, (list, tuple)): - self.mailhost, self.mailport = mailhost - else: - self.mailhost, self.mailport = mailhost, None - if isinstance(credentials, (list, tuple)): - self.username, self.password = credentials - else: - self.username = None - self.fromaddr = fromaddr - if isinstance(toaddrs, basestring): - toaddrs = [toaddrs] - self.toaddrs = toaddrs - self.subject = subject - self.secure = secure - self._timeout = 5.0 - - def getSubject(self, record): - """ - Determine the subject for the email. - - If you want to specify a subject line which is record-dependent, - override this method. - """ - return self.subject - - def emit(self, record): - """ - Emit a record. - - Format the record and send it to the specified addressees. - """ - try: - import smtplib - from email.utils import formatdate - port = self.mailport - if not port: - port = smtplib.SMTP_PORT - smtp = smtplib.SMTP(self.mailhost, port, timeout=self._timeout) - msg = self.format(record) - msg = "From: %s\r\nTo: %s\r\nSubject: %s\r\nDate: %s\r\n\r\n%s" % ( - self.fromaddr, - ",".join(self.toaddrs), - self.getSubject(record), - formatdate(), msg) - if self.username: - if self.secure is not None: - smtp.ehlo() - smtp.starttls(*self.secure) - smtp.ehlo() - smtp.login(self.username, self.password) - smtp.sendmail(self.fromaddr, self.toaddrs, msg) - smtp.quit() - except (KeyboardInterrupt, SystemExit): - raise - except: - self.handleError(record) - -class NTEventLogHandler(logging.Handler): - """ - A handler class which sends events to the NT Event Log. Adds a - registry entry for the specified application name. If no dllname is - provided, win32service.pyd (which contains some basic message - placeholders) is used. Note that use of these placeholders will make - your event logs big, as the entire message source is held in the log. - If you want slimmer logs, you have to pass in the name of your own DLL - which contains the message definitions you want to use in the event log. - """ - def __init__(self, appname, dllname=None, logtype="Application"): - logging.Handler.__init__(self) - try: - import win32evtlogutil, win32evtlog - self.appname = appname - self._welu = win32evtlogutil - if not dllname: - dllname = os.path.split(self._welu.__file__) - dllname = os.path.split(dllname[0]) - dllname = os.path.join(dllname[0], r'win32service.pyd') - self.dllname = dllname - self.logtype = logtype - self._welu.AddSourceToRegistry(appname, dllname, logtype) - self.deftype = win32evtlog.EVENTLOG_ERROR_TYPE - self.typemap = { - logging.DEBUG : win32evtlog.EVENTLOG_INFORMATION_TYPE, - logging.INFO : win32evtlog.EVENTLOG_INFORMATION_TYPE, - logging.WARNING : win32evtlog.EVENTLOG_WARNING_TYPE, - logging.ERROR : win32evtlog.EVENTLOG_ERROR_TYPE, - logging.CRITICAL: win32evtlog.EVENTLOG_ERROR_TYPE, - } - except ImportError: - print("The Python Win32 extensions for NT (service, event "\ - "logging) appear not to be available.") - self._welu = None - - def getMessageID(self, record): - """ - Return the message ID for the event record. If you are using your - own messages, you could do this by having the msg passed to the - logger being an ID rather than a formatting string. Then, in here, - you could use a dictionary lookup to get the message ID. This - version returns 1, which is the base message ID in win32service.pyd. - """ - return 1 - - def getEventCategory(self, record): - """ - Return the event category for the record. - - Override this if you want to specify your own categories. This version - returns 0. - """ - return 0 - - def getEventType(self, record): - """ - Return the event type for the record. - - Override this if you want to specify your own types. This version does - a mapping using the handler's typemap attribute, which is set up in - __init__() to a dictionary which contains mappings for DEBUG, INFO, - WARNING, ERROR and CRITICAL. If you are using your own levels you will - either need to override this method or place a suitable dictionary in - the handler's typemap attribute. - """ - return self.typemap.get(record.levelno, self.deftype) - - def emit(self, record): - """ - Emit a record. - - Determine the message ID, event category and event type. Then - log the message in the NT event log. - """ - if self._welu: - try: - id = self.getMessageID(record) - cat = self.getEventCategory(record) - type = self.getEventType(record) - msg = self.format(record) - self._welu.ReportEvent(self.appname, id, cat, type, [msg]) - except (KeyboardInterrupt, SystemExit): - raise - except: - self.handleError(record) - - def close(self): - """ - Clean up this handler. - - You can remove the application name from the registry as a - source of event log entries. However, if you do this, you will - not be able to see the events as you intended in the Event Log - Viewer - it needs to be able to access the registry to get the - DLL name. - """ - #self._welu.RemoveSourceFromRegistry(self.appname, self.logtype) - logging.Handler.close(self) - -class HTTPHandler(logging.Handler): - """ - A class which sends records to a Web server, using either GET or - POST semantics. - """ - def __init__(self, host, url, method="GET"): - """ - Initialize the instance with the host, the request URL, and the method - ("GET" or "POST") - """ - logging.Handler.__init__(self) - method = method.upper() - if method not in ["GET", "POST"]: - raise ValueError("method must be GET or POST") - self.host = host - self.url = url - self.method = method - - def mapLogRecord(self, record): - """ - Default implementation of mapping the log record into a dict - that is sent as the CGI data. Overwrite in your class. - Contributed by Franz Glasner. - """ - return record.__dict__ - - def emit(self, record): - """ - Emit a record. - - Send the record to the Web server as a percent-encoded dictionary - """ - try: - import httplib, urllib - host = self.host - h = httplib.HTTP(host) - url = self.url - data = urllib.urlencode(self.mapLogRecord(record)) - if self.method == "GET": - if (url.find('?') >= 0): - sep = '&' - else: - sep = '?' - url = url + "%c%s" % (sep, data) - h.putrequest(self.method, url) - # support multiple hosts on one IP address... - # need to strip optional :port from host, if present - i = host.find(":") - if i >= 0: - host = host[:i] - h.putheader("Host", host) - if self.method == "POST": - h.putheader("Content-type", - "application/x-www-form-urlencoded") - h.putheader("Content-length", str(len(data))) - h.endheaders(data if self.method == "POST" else None) - h.getreply() #can't do anything with the result - except (KeyboardInterrupt, SystemExit): - raise - except: - self.handleError(record) - -class BufferingHandler(logging.Handler): - """ - A handler class which buffers logging records in memory. Whenever each - record is added to the buffer, a check is made to see if the buffer should - be flushed. If it should, then flush() is expected to do what's needed. - """ - def __init__(self, capacity): - """ - Initialize the handler with the buffer size. - """ - logging.Handler.__init__(self) - self.capacity = capacity - self.buffer = [] - - def shouldFlush(self, record): - """ - Should the handler flush its buffer? - - Returns true if the buffer is up to capacity. This method can be - overridden to implement custom flushing strategies. - """ - return (len(self.buffer) >= self.capacity) - - def emit(self, record): - """ - Emit a record. - - Append the record. If shouldFlush() tells us to, call flush() to process - the buffer. - """ - self.buffer.append(record) - if self.shouldFlush(record): - self.flush() - - def flush(self): - """ - Override to implement custom flushing behaviour. - - This version just zaps the buffer to empty. - """ - self.acquire() - try: - self.buffer = [] - finally: - self.release() - - def close(self): - """ - Close the handler. - - This version just flushes and chains to the parent class' close(). - """ - try: - self.flush() - finally: - logging.Handler.close(self) - -class MemoryHandler(BufferingHandler): - """ - A handler class which buffers logging records in memory, periodically - flushing them to a target handler. Flushing occurs whenever the buffer - is full, or when an event of a certain severity or greater is seen. - """ - def __init__(self, capacity, flushLevel=logging.ERROR, target=None): - """ - Initialize the handler with the buffer size, the level at which - flushing should occur and an optional target. - - Note that without a target being set either here or via setTarget(), - a MemoryHandler is no use to anyone! - """ - BufferingHandler.__init__(self, capacity) - self.flushLevel = flushLevel - self.target = target - - def shouldFlush(self, record): - """ - Check for buffer full or a record at the flushLevel or higher. - """ - return (len(self.buffer) >= self.capacity) or \ - (record.levelno >= self.flushLevel) - - def setTarget(self, target): - """ - Set the target handler for this handler. - """ - self.target = target - - def flush(self): - """ - For a MemoryHandler, flushing means just sending the buffered - records to the target, if there is one. Override if you want - different behaviour. - """ - self.acquire() - try: - if self.target: - for record in self.buffer: - self.target.handle(record) - self.buffer = [] - finally: - self.release() - - def close(self): - """ - Flush, set the target to None and lose the buffer. - """ - try: - self.flush() - finally: - self.acquire() - try: - self.target = None - BufferingHandler.close(self) - finally: - self.release() diff --git a/python/Lib/macpath.py b/python/Lib/macpath.py deleted file mode 100755 index 9ebd83cf90..0000000000 --- a/python/Lib/macpath.py +++ /dev/null @@ -1,216 +0,0 @@ -"""Pathname and path-related operations for the Macintosh.""" - -import os -import warnings -from stat import * -import genericpath -from genericpath import * -from genericpath import _unicode - -__all__ = ["normcase","isabs","join","splitdrive","split","splitext", - "basename","dirname","commonprefix","getsize","getmtime", - "getatime","getctime", "islink","exists","lexists","isdir","isfile", - "walk","expanduser","expandvars","normpath","abspath", - "curdir","pardir","sep","pathsep","defpath","altsep","extsep", - "devnull","realpath","supports_unicode_filenames"] - -# strings representing various path-related bits and pieces -curdir = ':' -pardir = '::' -extsep = '.' -sep = ':' -pathsep = '\n' -defpath = ':' -altsep = None -devnull = 'Dev:Null' - -# Normalize the case of a pathname. Dummy in Posix, but .lower() here. - -def normcase(path): - return path.lower() - - -def isabs(s): - """Return true if a path is absolute. - On the Mac, relative paths begin with a colon, - but as a special case, paths with no colons at all are also relative. - Anything else is absolute (the string up to the first colon is the - volume name).""" - - return ':' in s and s[0] != ':' - - -def join(s, *p): - path = s - for t in p: - if (not path) or isabs(t): - path = t - continue - if t[:1] == ':': - t = t[1:] - if ':' not in path: - path = ':' + path - if path[-1:] != ':': - path = path + ':' - path = path + t - return path - - -def split(s): - """Split a pathname into two parts: the directory leading up to the final - bit, and the basename (the filename, without colons, in that directory). - The result (s, t) is such that join(s, t) yields the original argument.""" - - if ':' not in s: return '', s - colon = 0 - for i in range(len(s)): - if s[i] == ':': colon = i + 1 - path, file = s[:colon-1], s[colon:] - if path and not ':' in path: - path = path + ':' - return path, file - - -def splitext(p): - return genericpath._splitext(p, sep, altsep, extsep) -splitext.__doc__ = genericpath._splitext.__doc__ - -def splitdrive(p): - """Split a pathname into a drive specification and the rest of the - path. Useful on DOS/Windows/NT; on the Mac, the drive is always - empty (don't use the volume name -- it doesn't have the same - syntactic and semantic oddities as DOS drive letters, such as there - being a separate current directory per drive).""" - - return '', p - - -# Short interfaces to split() - -def dirname(s): return split(s)[0] -def basename(s): return split(s)[1] - -def ismount(s): - if not isabs(s): - return False - components = split(s) - return len(components) == 2 and components[1] == '' - -def islink(s): - """Return true if the pathname refers to a symbolic link.""" - - try: - import Carbon.File - return Carbon.File.ResolveAliasFile(s, 0)[2] - except: - return False - -# Is `stat`/`lstat` a meaningful difference on the Mac? This is safe in any -# case. - -def lexists(path): - """Test whether a path exists. Returns True for broken symbolic links""" - - try: - st = os.lstat(path) - except os.error: - return False - return True - -def expandvars(path): - """Dummy to retain interface-compatibility with other operating systems.""" - return path - - -def expanduser(path): - """Dummy to retain interface-compatibility with other operating systems.""" - return path - -class norm_error(Exception): - """Path cannot be normalized""" - -def normpath(s): - """Normalize a pathname. Will return the same result for - equivalent paths.""" - - if ":" not in s: - return ":"+s - - comps = s.split(":") - i = 1 - while i < len(comps)-1: - if comps[i] == "" and comps[i-1] != "": - if i > 1: - del comps[i-1:i+1] - i = i - 1 - else: - # best way to handle this is to raise an exception - raise norm_error, 'Cannot use :: immediately after volume name' - else: - i = i + 1 - - s = ":".join(comps) - - # remove trailing ":" except for ":" and "Volume:" - if s[-1] == ":" and len(comps) > 2 and s != ":"*len(s): - s = s[:-1] - return s - - -def walk(top, func, arg): - """Directory tree walk with callback function. - - For each directory in the directory tree rooted at top (including top - itself, but excluding '.' and '..'), call func(arg, dirname, fnames). - dirname is the name of the directory, and fnames a list of the names of - the files and subdirectories in dirname (excluding '.' and '..'). func - may modify the fnames list in-place (e.g. via del or slice assignment), - and walk will only recurse into the subdirectories whose names remain in - fnames; this can be used to implement a filter, or to impose a specific - order of visiting. No semantics are defined for, or required of, arg, - beyond that arg is always passed to func. It can be used, e.g., to pass - a filename pattern, or a mutable object designed to accumulate - statistics. Passing None for arg is common.""" - warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.", - stacklevel=2) - try: - names = os.listdir(top) - except os.error: - return - func(arg, top, names) - for name in names: - name = join(top, name) - if isdir(name) and not islink(name): - walk(name, func, arg) - - -def abspath(path): - """Return an absolute path.""" - if not isabs(path): - if isinstance(path, _unicode): - cwd = os.getcwdu() - else: - cwd = os.getcwd() - path = join(cwd, path) - return normpath(path) - -# realpath is a no-op on systems without islink support -def realpath(path): - path = abspath(path) - try: - import Carbon.File - except ImportError: - return path - if not path: - return path - components = path.split(':') - path = components[0] + ':' - for c in components[1:]: - path = join(path, c) - try: - path = Carbon.File.FSResolveAliasFile(path, 1)[0].as_pathname() - except Carbon.File.Error: - pass - return path - -supports_unicode_filenames = True diff --git a/python/Lib/macurl2path.py b/python/Lib/macurl2path.py deleted file mode 100755 index 6f8260f104..0000000000 --- a/python/Lib/macurl2path.py +++ /dev/null @@ -1,77 +0,0 @@ -"""Macintosh-specific module for conversion between pathnames and URLs. - -Do not import directly; use urllib instead.""" - -import urllib -import os - -__all__ = ["url2pathname","pathname2url"] - -def url2pathname(pathname): - """OS-specific conversion from a relative URL of the 'file' scheme - to a file system path; not recommended for general use.""" - # - # XXXX The .. handling should be fixed... - # - tp = urllib.splittype(pathname)[0] - if tp and tp != 'file': - raise RuntimeError, 'Cannot convert non-local URL to pathname' - # Turn starting /// into /, an empty hostname means current host - if pathname[:3] == '///': - pathname = pathname[2:] - elif pathname[:2] == '//': - raise RuntimeError, 'Cannot convert non-local URL to pathname' - components = pathname.split('/') - # Remove . and embedded .. - i = 0 - while i < len(components): - if components[i] == '.': - del components[i] - elif components[i] == '..' and i > 0 and \ - components[i-1] not in ('', '..'): - del components[i-1:i+1] - i = i-1 - elif components[i] == '' and i > 0 and components[i-1] != '': - del components[i] - else: - i = i+1 - if not components[0]: - # Absolute unix path, don't start with colon - rv = ':'.join(components[1:]) - else: - # relative unix path, start with colon. First replace - # leading .. by empty strings (giving ::file) - i = 0 - while i < len(components) and components[i] == '..': - components[i] = '' - i = i + 1 - rv = ':' + ':'.join(components) - # and finally unquote slashes and other funny characters - return urllib.unquote(rv) - -def pathname2url(pathname): - """OS-specific conversion from a file system path to a relative URL - of the 'file' scheme; not recommended for general use.""" - if '/' in pathname: - raise RuntimeError, "Cannot convert pathname containing slashes" - components = pathname.split(':') - # Remove empty first and/or last component - if components[0] == '': - del components[0] - if components[-1] == '': - del components[-1] - # Replace empty string ('::') by .. (will result in '/../' later) - for i in range(len(components)): - if components[i] == '': - components[i] = '..' - # Truncate names longer than 31 bytes - components = map(_pncomp2url, components) - - if os.path.isabs(pathname): - return '/' + '/'.join(components) - else: - return '/'.join(components) - -def _pncomp2url(component): - component = urllib.quote(component[:31], safe='') # We want to quote slashes - return component diff --git a/python/Lib/mailbox.py b/python/Lib/mailbox.py deleted file mode 100755 index b1e5faafe9..0000000000 --- a/python/Lib/mailbox.py +++ /dev/null @@ -1,2240 +0,0 @@ -"""Read/write support for Maildir, mbox, MH, Babyl, and MMDF mailboxes.""" - -# Notes for authors of new mailbox subclasses: -# -# Remember to fsync() changes to disk before closing a modified file -# or returning from a flush() method. See functions _sync_flush() and -# _sync_close(). - -import sys -import os -import time -import calendar -import socket -import errno -import copy -import email -import email.message -import email.generator -import StringIO -try: - if sys.platform == 'os2emx': - # OS/2 EMX fcntl() not adequate - raise ImportError - import fcntl -except ImportError: - fcntl = None - -import warnings -with warnings.catch_warnings(): - if sys.py3kwarning: - warnings.filterwarnings("ignore", ".*rfc822 has been removed", - DeprecationWarning) - import rfc822 - -__all__ = [ 'Mailbox', 'Maildir', 'mbox', 'MH', 'Babyl', 'MMDF', - 'Message', 'MaildirMessage', 'mboxMessage', 'MHMessage', - 'BabylMessage', 'MMDFMessage', 'UnixMailbox', - 'PortableUnixMailbox', 'MmdfMailbox', 'MHMailbox', 'BabylMailbox' ] - -class Mailbox: - """A group of messages in a particular place.""" - - def __init__(self, path, factory=None, create=True): - """Initialize a Mailbox instance.""" - self._path = os.path.abspath(os.path.expanduser(path)) - self._factory = factory - - def add(self, message): - """Add message and return assigned key.""" - raise NotImplementedError('Method must be implemented by subclass') - - def remove(self, key): - """Remove the keyed message; raise KeyError if it doesn't exist.""" - raise NotImplementedError('Method must be implemented by subclass') - - def __delitem__(self, key): - self.remove(key) - - def discard(self, key): - """If the keyed message exists, remove it.""" - try: - self.remove(key) - except KeyError: - pass - - def __setitem__(self, key, message): - """Replace the keyed message; raise KeyError if it doesn't exist.""" - raise NotImplementedError('Method must be implemented by subclass') - - def get(self, key, default=None): - """Return the keyed message, or default if it doesn't exist.""" - try: - return self.__getitem__(key) - except KeyError: - return default - - def __getitem__(self, key): - """Return the keyed message; raise KeyError if it doesn't exist.""" - if not self._factory: - return self.get_message(key) - else: - return self._factory(self.get_file(key)) - - def get_message(self, key): - """Return a Message representation or raise a KeyError.""" - raise NotImplementedError('Method must be implemented by subclass') - - def get_string(self, key): - """Return a string representation or raise a KeyError.""" - raise NotImplementedError('Method must be implemented by subclass') - - def get_file(self, key): - """Return a file-like representation or raise a KeyError.""" - raise NotImplementedError('Method must be implemented by subclass') - - def iterkeys(self): - """Return an iterator over keys.""" - raise NotImplementedError('Method must be implemented by subclass') - - def keys(self): - """Return a list of keys.""" - return list(self.iterkeys()) - - def itervalues(self): - """Return an iterator over all messages.""" - for key in self.iterkeys(): - try: - value = self[key] - except KeyError: - continue - yield value - - def __iter__(self): - return self.itervalues() - - def values(self): - """Return a list of messages. Memory intensive.""" - return list(self.itervalues()) - - def iteritems(self): - """Return an iterator over (key, message) tuples.""" - for key in self.iterkeys(): - try: - value = self[key] - except KeyError: - continue - yield (key, value) - - def items(self): - """Return a list of (key, message) tuples. Memory intensive.""" - return list(self.iteritems()) - - def has_key(self, key): - """Return True if the keyed message exists, False otherwise.""" - raise NotImplementedError('Method must be implemented by subclass') - - def __contains__(self, key): - return self.has_key(key) - - def __len__(self): - """Return a count of messages in the mailbox.""" - raise NotImplementedError('Method must be implemented by subclass') - - def clear(self): - """Delete all messages.""" - for key in self.iterkeys(): - self.discard(key) - - def pop(self, key, default=None): - """Delete the keyed message and return it, or default.""" - try: - result = self[key] - except KeyError: - return default - self.discard(key) - return result - - def popitem(self): - """Delete an arbitrary (key, message) pair and return it.""" - for key in self.iterkeys(): - return (key, self.pop(key)) # This is only run once. - else: - raise KeyError('No messages in mailbox') - - def update(self, arg=None): - """Change the messages that correspond to certain keys.""" - if hasattr(arg, 'iteritems'): - source = arg.iteritems() - elif hasattr(arg, 'items'): - source = arg.items() - else: - source = arg - bad_key = False - for key, message in source: - try: - self[key] = message - except KeyError: - bad_key = True - if bad_key: - raise KeyError('No message with key(s)') - - def flush(self): - """Write any pending changes to the disk.""" - raise NotImplementedError('Method must be implemented by subclass') - - def lock(self): - """Lock the mailbox.""" - raise NotImplementedError('Method must be implemented by subclass') - - def unlock(self): - """Unlock the mailbox if it is locked.""" - raise NotImplementedError('Method must be implemented by subclass') - - def close(self): - """Flush and close the mailbox.""" - raise NotImplementedError('Method must be implemented by subclass') - - # Whether each message must end in a newline - _append_newline = False - - def _dump_message(self, message, target, mangle_from_=False): - # Most files are opened in binary mode to allow predictable seeking. - # To get native line endings on disk, the user-friendly \n line endings - # used in strings and by email.Message are translated here. - """Dump message contents to target file.""" - if isinstance(message, email.message.Message): - buffer = StringIO.StringIO() - gen = email.generator.Generator(buffer, mangle_from_, 0) - gen.flatten(message) - buffer.seek(0) - data = buffer.read().replace('\n', os.linesep) - target.write(data) - if self._append_newline and not data.endswith(os.linesep): - # Make sure the message ends with a newline - target.write(os.linesep) - elif isinstance(message, str): - if mangle_from_: - message = message.replace('\nFrom ', '\n>From ') - message = message.replace('\n', os.linesep) - target.write(message) - if self._append_newline and not message.endswith(os.linesep): - # Make sure the message ends with a newline - target.write(os.linesep) - elif hasattr(message, 'read'): - lastline = None - while True: - line = message.readline() - if line == '': - break - if mangle_from_ and line.startswith('From '): - line = '>From ' + line[5:] - line = line.replace('\n', os.linesep) - target.write(line) - lastline = line - if self._append_newline and lastline and not lastline.endswith(os.linesep): - # Make sure the message ends with a newline - target.write(os.linesep) - else: - raise TypeError('Invalid message type: %s' % type(message)) - - -class Maildir(Mailbox): - """A qmail-style Maildir mailbox.""" - - colon = ':' - - def __init__(self, dirname, factory=rfc822.Message, create=True): - """Initialize a Maildir instance.""" - Mailbox.__init__(self, dirname, factory, create) - self._paths = { - 'tmp': os.path.join(self._path, 'tmp'), - 'new': os.path.join(self._path, 'new'), - 'cur': os.path.join(self._path, 'cur'), - } - if not os.path.exists(self._path): - if create: - os.mkdir(self._path, 0700) - for path in self._paths.values(): - os.mkdir(path, 0o700) - else: - raise NoSuchMailboxError(self._path) - self._toc = {} - self._toc_mtimes = {'cur': 0, 'new': 0} - self._last_read = 0 # Records last time we read cur/new - self._skewfactor = 0.1 # Adjust if os/fs clocks are skewing - - def add(self, message): - """Add message and return assigned key.""" - tmp_file = self._create_tmp() - try: - self._dump_message(message, tmp_file) - except BaseException: - tmp_file.close() - os.remove(tmp_file.name) - raise - _sync_close(tmp_file) - if isinstance(message, MaildirMessage): - subdir = message.get_subdir() - suffix = self.colon + message.get_info() - if suffix == self.colon: - suffix = '' - else: - subdir = 'new' - suffix = '' - uniq = os.path.basename(tmp_file.name).split(self.colon)[0] - dest = os.path.join(self._path, subdir, uniq + suffix) - if isinstance(message, MaildirMessage): - os.utime(tmp_file.name, - (os.path.getatime(tmp_file.name), message.get_date())) - # No file modification should be done after the file is moved to its - # final position in order to prevent race conditions with changes - # from other programs - try: - if hasattr(os, 'link'): - os.link(tmp_file.name, dest) - os.remove(tmp_file.name) - else: - os.rename(tmp_file.name, dest) - except OSError, e: - os.remove(tmp_file.name) - if e.errno == errno.EEXIST: - raise ExternalClashError('Name clash with existing message: %s' - % dest) - else: - raise - return uniq - - def remove(self, key): - """Remove the keyed message; raise KeyError if it doesn't exist.""" - os.remove(os.path.join(self._path, self._lookup(key))) - - def discard(self, key): - """If the keyed message exists, remove it.""" - # This overrides an inapplicable implementation in the superclass. - try: - self.remove(key) - except KeyError: - pass - except OSError, e: - if e.errno != errno.ENOENT: - raise - - def __setitem__(self, key, message): - """Replace the keyed message; raise KeyError if it doesn't exist.""" - old_subpath = self._lookup(key) - temp_key = self.add(message) - temp_subpath = self._lookup(temp_key) - if isinstance(message, MaildirMessage): - # temp's subdir and suffix were specified by message. - dominant_subpath = temp_subpath - else: - # temp's subdir and suffix were defaults from add(). - dominant_subpath = old_subpath - subdir = os.path.dirname(dominant_subpath) - if self.colon in dominant_subpath: - suffix = self.colon + dominant_subpath.split(self.colon)[-1] - else: - suffix = '' - self.discard(key) - tmp_path = os.path.join(self._path, temp_subpath) - new_path = os.path.join(self._path, subdir, key + suffix) - if isinstance(message, MaildirMessage): - os.utime(tmp_path, - (os.path.getatime(tmp_path), message.get_date())) - # No file modification should be done after the file is moved to its - # final position in order to prevent race conditions with changes - # from other programs - os.rename(tmp_path, new_path) - - def get_message(self, key): - """Return a Message representation or raise a KeyError.""" - subpath = self._lookup(key) - f = open(os.path.join(self._path, subpath), 'r') - try: - if self._factory: - msg = self._factory(f) - else: - msg = MaildirMessage(f) - finally: - f.close() - subdir, name = os.path.split(subpath) - msg.set_subdir(subdir) - if self.colon in name: - msg.set_info(name.split(self.colon)[-1]) - msg.set_date(os.path.getmtime(os.path.join(self._path, subpath))) - return msg - - def get_string(self, key): - """Return a string representation or raise a KeyError.""" - f = open(os.path.join(self._path, self._lookup(key)), 'r') - try: - return f.read() - finally: - f.close() - - def get_file(self, key): - """Return a file-like representation or raise a KeyError.""" - f = open(os.path.join(self._path, self._lookup(key)), 'rb') - return _ProxyFile(f) - - def iterkeys(self): - """Return an iterator over keys.""" - self._refresh() - for key in self._toc: - try: - self._lookup(key) - except KeyError: - continue - yield key - - def has_key(self, key): - """Return True if the keyed message exists, False otherwise.""" - self._refresh() - return key in self._toc - - def __len__(self): - """Return a count of messages in the mailbox.""" - self._refresh() - return len(self._toc) - - def flush(self): - """Write any pending changes to disk.""" - # Maildir changes are always written immediately, so there's nothing - # to do. - pass - - def lock(self): - """Lock the mailbox.""" - return - - def unlock(self): - """Unlock the mailbox if it is locked.""" - return - - def close(self): - """Flush and close the mailbox.""" - return - - def list_folders(self): - """Return a list of folder names.""" - result = [] - for entry in os.listdir(self._path): - if len(entry) > 1 and entry[0] == '.' and \ - os.path.isdir(os.path.join(self._path, entry)): - result.append(entry[1:]) - return result - - def get_folder(self, folder): - """Return a Maildir instance for the named folder.""" - return Maildir(os.path.join(self._path, '.' + folder), - factory=self._factory, - create=False) - - def add_folder(self, folder): - """Create a folder and return a Maildir instance representing it.""" - path = os.path.join(self._path, '.' + folder) - result = Maildir(path, factory=self._factory) - maildirfolder_path = os.path.join(path, 'maildirfolder') - if not os.path.exists(maildirfolder_path): - os.close(os.open(maildirfolder_path, os.O_CREAT | os.O_WRONLY, - 0666)) - return result - - def remove_folder(self, folder): - """Delete the named folder, which must be empty.""" - path = os.path.join(self._path, '.' + folder) - for entry in os.listdir(os.path.join(path, 'new')) + \ - os.listdir(os.path.join(path, 'cur')): - if len(entry) < 1 or entry[0] != '.': - raise NotEmptyError('Folder contains message(s): %s' % folder) - for entry in os.listdir(path): - if entry != 'new' and entry != 'cur' and entry != 'tmp' and \ - os.path.isdir(os.path.join(path, entry)): - raise NotEmptyError("Folder contains subdirectory '%s': %s" % - (folder, entry)) - for root, dirs, files in os.walk(path, topdown=False): - for entry in files: - os.remove(os.path.join(root, entry)) - for entry in dirs: - os.rmdir(os.path.join(root, entry)) - os.rmdir(path) - - def clean(self): - """Delete old files in "tmp".""" - now = time.time() - for entry in os.listdir(os.path.join(self._path, 'tmp')): - path = os.path.join(self._path, 'tmp', entry) - if now - os.path.getatime(path) > 129600: # 60 * 60 * 36 - os.remove(path) - - _count = 1 # This is used to generate unique file names. - - def _create_tmp(self): - """Create a file in the tmp subdirectory and open and return it.""" - now = time.time() - hostname = socket.gethostname() - if '/' in hostname: - hostname = hostname.replace('/', r'\057') - if ':' in hostname: - hostname = hostname.replace(':', r'\072') - uniq = "%s.M%sP%sQ%s.%s" % (int(now), int(now % 1 * 1e6), os.getpid(), - Maildir._count, hostname) - path = os.path.join(self._path, 'tmp', uniq) - try: - os.stat(path) - except OSError, e: - if e.errno == errno.ENOENT: - Maildir._count += 1 - try: - return _create_carefully(path) - except OSError, e: - if e.errno != errno.EEXIST: - raise - else: - raise - - # Fall through to here if stat succeeded or open raised EEXIST. - raise ExternalClashError('Name clash prevented file creation: %s' % - path) - - def _refresh(self): - """Update table of contents mapping.""" - # If it has been less than two seconds since the last _refresh() call, - # we have to unconditionally re-read the mailbox just in case it has - # been modified, because os.path.mtime() has a 2 sec resolution in the - # most common worst case (FAT) and a 1 sec resolution typically. This - # results in a few unnecessary re-reads when _refresh() is called - # multiple times in that interval, but once the clock ticks over, we - # will only re-read as needed. Because the filesystem might be being - # served by an independent system with its own clock, we record and - # compare with the mtimes from the filesystem. Because the other - # system's clock might be skewing relative to our clock, we add an - # extra delta to our wait. The default is one tenth second, but is an - # instance variable and so can be adjusted if dealing with a - # particularly skewed or irregular system. - if time.time() - self._last_read > 2 + self._skewfactor: - refresh = False - for subdir in self._toc_mtimes: - mtime = os.path.getmtime(self._paths[subdir]) - if mtime > self._toc_mtimes[subdir]: - refresh = True - self._toc_mtimes[subdir] = mtime - if not refresh: - return - # Refresh toc - self._toc = {} - for subdir in self._toc_mtimes: - path = self._paths[subdir] - for entry in os.listdir(path): - p = os.path.join(path, entry) - if os.path.isdir(p): - continue - uniq = entry.split(self.colon)[0] - self._toc[uniq] = os.path.join(subdir, entry) - self._last_read = time.time() - - def _lookup(self, key): - """Use TOC to return subpath for given key, or raise a KeyError.""" - try: - if os.path.exists(os.path.join(self._path, self._toc[key])): - return self._toc[key] - except KeyError: - pass - self._refresh() - try: - return self._toc[key] - except KeyError: - raise KeyError('No message with key: %s' % key) - - # This method is for backward compatibility only. - def next(self): - """Return the next message in a one-time iteration.""" - if not hasattr(self, '_onetime_keys'): - self._onetime_keys = self.iterkeys() - while True: - try: - return self[self._onetime_keys.next()] - except StopIteration: - return None - except KeyError: - continue - - -class _singlefileMailbox(Mailbox): - """A single-file mailbox.""" - - def __init__(self, path, factory=None, create=True): - """Initialize a single-file mailbox.""" - Mailbox.__init__(self, path, factory, create) - try: - f = open(self._path, 'rb+') - except IOError, e: - if e.errno == errno.ENOENT: - if create: - f = open(self._path, 'wb+') - else: - raise NoSuchMailboxError(self._path) - elif e.errno in (errno.EACCES, errno.EROFS): - f = open(self._path, 'rb') - else: - raise - self._file = f - self._toc = None - self._next_key = 0 - self._pending = False # No changes require rewriting the file. - self._pending_sync = False # No need to sync the file - self._locked = False - self._file_length = None # Used to record mailbox size - - def add(self, message): - """Add message and return assigned key.""" - self._lookup() - self._toc[self._next_key] = self._append_message(message) - self._next_key += 1 - # _append_message appends the message to the mailbox file. We - # don't need a full rewrite + rename, sync is enough. - self._pending_sync = True - return self._next_key - 1 - - def remove(self, key): - """Remove the keyed message; raise KeyError if it doesn't exist.""" - self._lookup(key) - del self._toc[key] - self._pending = True - - def __setitem__(self, key, message): - """Replace the keyed message; raise KeyError if it doesn't exist.""" - self._lookup(key) - self._toc[key] = self._append_message(message) - self._pending = True - - def iterkeys(self): - """Return an iterator over keys.""" - self._lookup() - for key in self._toc.keys(): - yield key - - def has_key(self, key): - """Return True if the keyed message exists, False otherwise.""" - self._lookup() - return key in self._toc - - def __len__(self): - """Return a count of messages in the mailbox.""" - self._lookup() - return len(self._toc) - - def lock(self): - """Lock the mailbox.""" - if not self._locked: - _lock_file(self._file) - self._locked = True - - def unlock(self): - """Unlock the mailbox if it is locked.""" - if self._locked: - _unlock_file(self._file) - self._locked = False - - def flush(self): - """Write any pending changes to disk.""" - if not self._pending: - if self._pending_sync: - # Messages have only been added, so syncing the file - # is enough. - _sync_flush(self._file) - self._pending_sync = False - return - - # In order to be writing anything out at all, self._toc must - # already have been generated (and presumably has been modified - # by adding or deleting an item). - assert self._toc is not None - - # Check length of self._file; if it's changed, some other process - # has modified the mailbox since we scanned it. - self._file.seek(0, 2) - cur_len = self._file.tell() - if cur_len != self._file_length: - raise ExternalClashError('Size of mailbox file changed ' - '(expected %i, found %i)' % - (self._file_length, cur_len)) - - new_file = _create_temporary(self._path) - try: - new_toc = {} - self._pre_mailbox_hook(new_file) - for key in sorted(self._toc.keys()): - start, stop = self._toc[key] - self._file.seek(start) - self._pre_message_hook(new_file) - new_start = new_file.tell() - while True: - buffer = self._file.read(min(4096, - stop - self._file.tell())) - if buffer == '': - break - new_file.write(buffer) - new_toc[key] = (new_start, new_file.tell()) - self._post_message_hook(new_file) - self._file_length = new_file.tell() - except: - new_file.close() - os.remove(new_file.name) - raise - _sync_close(new_file) - # self._file is about to get replaced, so no need to sync. - self._file.close() - # Make sure the new file's mode is the same as the old file's - mode = os.stat(self._path).st_mode - os.chmod(new_file.name, mode) - try: - os.rename(new_file.name, self._path) - except OSError, e: - if e.errno == errno.EEXIST or \ - (os.name == 'os2' and e.errno == errno.EACCES): - os.remove(self._path) - os.rename(new_file.name, self._path) - else: - raise - self._file = open(self._path, 'rb+') - self._toc = new_toc - self._pending = False - self._pending_sync = False - if self._locked: - _lock_file(self._file, dotlock=False) - - def _pre_mailbox_hook(self, f): - """Called before writing the mailbox to file f.""" - return - - def _pre_message_hook(self, f): - """Called before writing each message to file f.""" - return - - def _post_message_hook(self, f): - """Called after writing each message to file f.""" - return - - def close(self): - """Flush and close the mailbox.""" - try: - self.flush() - finally: - try: - if self._locked: - self.unlock() - finally: - self._file.close() # Sync has been done by self.flush() above. - - def _lookup(self, key=None): - """Return (start, stop) or raise KeyError.""" - if self._toc is None: - self._generate_toc() - if key is not None: - try: - return self._toc[key] - except KeyError: - raise KeyError('No message with key: %s' % key) - - def _append_message(self, message): - """Append message to mailbox and return (start, stop) offsets.""" - self._file.seek(0, 2) - before = self._file.tell() - if len(self._toc) == 0 and not self._pending: - # This is the first message, and the _pre_mailbox_hook - # hasn't yet been called. If self._pending is True, - # messages have been removed, so _pre_mailbox_hook must - # have been called already. - self._pre_mailbox_hook(self._file) - try: - self._pre_message_hook(self._file) - offsets = self._install_message(message) - self._post_message_hook(self._file) - except BaseException: - self._file.truncate(before) - raise - self._file.flush() - self._file_length = self._file.tell() # Record current length of mailbox - return offsets - - - -class _mboxMMDF(_singlefileMailbox): - """An mbox or MMDF mailbox.""" - - _mangle_from_ = True - - def get_message(self, key): - """Return a Message representation or raise a KeyError.""" - start, stop = self._lookup(key) - self._file.seek(start) - from_line = self._file.readline().replace(os.linesep, '') - string = self._file.read(stop - self._file.tell()) - msg = self._message_factory(string.replace(os.linesep, '\n')) - msg.set_from(from_line[5:]) - return msg - - def get_string(self, key, from_=False): - """Return a string representation or raise a KeyError.""" - start, stop = self._lookup(key) - self._file.seek(start) - if not from_: - self._file.readline() - string = self._file.read(stop - self._file.tell()) - return string.replace(os.linesep, '\n') - - def get_file(self, key, from_=False): - """Return a file-like representation or raise a KeyError.""" - start, stop = self._lookup(key) - self._file.seek(start) - if not from_: - self._file.readline() - return _PartialFile(self._file, self._file.tell(), stop) - - def _install_message(self, message): - """Format a message and blindly write to self._file.""" - from_line = None - if isinstance(message, str) and message.startswith('From '): - newline = message.find('\n') - if newline != -1: - from_line = message[:newline] - message = message[newline + 1:] - else: - from_line = message - message = '' - elif isinstance(message, _mboxMMDFMessage): - from_line = 'From ' + message.get_from() - elif isinstance(message, email.message.Message): - from_line = message.get_unixfrom() # May be None. - if from_line is None: - from_line = 'From MAILER-DAEMON %s' % time.asctime(time.gmtime()) - start = self._file.tell() - self._file.write(from_line + os.linesep) - self._dump_message(message, self._file, self._mangle_from_) - stop = self._file.tell() - return (start, stop) - - -class mbox(_mboxMMDF): - """A classic mbox mailbox.""" - - _mangle_from_ = True - - # All messages must end in a newline character, and - # _post_message_hooks outputs an empty line between messages. - _append_newline = True - - def __init__(self, path, factory=None, create=True): - """Initialize an mbox mailbox.""" - self._message_factory = mboxMessage - _mboxMMDF.__init__(self, path, factory, create) - - def _post_message_hook(self, f): - """Called after writing each message to file f.""" - f.write(os.linesep) - - def _generate_toc(self): - """Generate key-to-(start, stop) table of contents.""" - starts, stops = [], [] - last_was_empty = False - self._file.seek(0) - while True: - line_pos = self._file.tell() - line = self._file.readline() - if line.startswith('From '): - if len(stops) < len(starts): - if last_was_empty: - stops.append(line_pos - len(os.linesep)) - else: - # The last line before the "From " line wasn't - # blank, but we consider it a start of a - # message anyway. - stops.append(line_pos) - starts.append(line_pos) - last_was_empty = False - elif not line: - if last_was_empty: - stops.append(line_pos - len(os.linesep)) - else: - stops.append(line_pos) - break - elif line == os.linesep: - last_was_empty = True - else: - last_was_empty = False - self._toc = dict(enumerate(zip(starts, stops))) - self._next_key = len(self._toc) - self._file_length = self._file.tell() - - -class MMDF(_mboxMMDF): - """An MMDF mailbox.""" - - def __init__(self, path, factory=None, create=True): - """Initialize an MMDF mailbox.""" - self._message_factory = MMDFMessage - _mboxMMDF.__init__(self, path, factory, create) - - def _pre_message_hook(self, f): - """Called before writing each message to file f.""" - f.write('\001\001\001\001' + os.linesep) - - def _post_message_hook(self, f): - """Called after writing each message to file f.""" - f.write(os.linesep + '\001\001\001\001' + os.linesep) - - def _generate_toc(self): - """Generate key-to-(start, stop) table of contents.""" - starts, stops = [], [] - self._file.seek(0) - next_pos = 0 - while True: - line_pos = next_pos - line = self._file.readline() - next_pos = self._file.tell() - if line.startswith('\001\001\001\001' + os.linesep): - starts.append(next_pos) - while True: - line_pos = next_pos - line = self._file.readline() - next_pos = self._file.tell() - if line == '\001\001\001\001' + os.linesep: - stops.append(line_pos - len(os.linesep)) - break - elif line == '': - stops.append(line_pos) - break - elif line == '': - break - self._toc = dict(enumerate(zip(starts, stops))) - self._next_key = len(self._toc) - self._file.seek(0, 2) - self._file_length = self._file.tell() - - -class MH(Mailbox): - """An MH mailbox.""" - - def __init__(self, path, factory=None, create=True): - """Initialize an MH instance.""" - Mailbox.__init__(self, path, factory, create) - if not os.path.exists(self._path): - if create: - os.mkdir(self._path, 0700) - os.close(os.open(os.path.join(self._path, '.mh_sequences'), - os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0600)) - else: - raise NoSuchMailboxError(self._path) - self._locked = False - - def add(self, message): - """Add message and return assigned key.""" - keys = self.keys() - if len(keys) == 0: - new_key = 1 - else: - new_key = max(keys) + 1 - new_path = os.path.join(self._path, str(new_key)) - f = _create_carefully(new_path) - closed = False - try: - if self._locked: - _lock_file(f) - try: - try: - self._dump_message(message, f) - except BaseException: - # Unlock and close so it can be deleted on Windows - if self._locked: - _unlock_file(f) - _sync_close(f) - closed = True - os.remove(new_path) - raise - if isinstance(message, MHMessage): - self._dump_sequences(message, new_key) - finally: - if self._locked: - _unlock_file(f) - finally: - if not closed: - _sync_close(f) - return new_key - - def remove(self, key): - """Remove the keyed message; raise KeyError if it doesn't exist.""" - path = os.path.join(self._path, str(key)) - try: - f = open(path, 'rb+') - except IOError, e: - if e.errno == errno.ENOENT: - raise KeyError('No message with key: %s' % key) - else: - raise - else: - f.close() - os.remove(path) - - def __setitem__(self, key, message): - """Replace the keyed message; raise KeyError if it doesn't exist.""" - path = os.path.join(self._path, str(key)) - try: - f = open(path, 'rb+') - except IOError, e: - if e.errno == errno.ENOENT: - raise KeyError('No message with key: %s' % key) - else: - raise - try: - if self._locked: - _lock_file(f) - try: - os.close(os.open(path, os.O_WRONLY | os.O_TRUNC)) - self._dump_message(message, f) - if isinstance(message, MHMessage): - self._dump_sequences(message, key) - finally: - if self._locked: - _unlock_file(f) - finally: - _sync_close(f) - - def get_message(self, key): - """Return a Message representation or raise a KeyError.""" - try: - if self._locked: - f = open(os.path.join(self._path, str(key)), 'r+') - else: - f = open(os.path.join(self._path, str(key)), 'r') - except IOError, e: - if e.errno == errno.ENOENT: - raise KeyError('No message with key: %s' % key) - else: - raise - try: - if self._locked: - _lock_file(f) - try: - msg = MHMessage(f) - finally: - if self._locked: - _unlock_file(f) - finally: - f.close() - for name, key_list in self.get_sequences().iteritems(): - if key in key_list: - msg.add_sequence(name) - return msg - - def get_string(self, key): - """Return a string representation or raise a KeyError.""" - try: - if self._locked: - f = open(os.path.join(self._path, str(key)), 'r+') - else: - f = open(os.path.join(self._path, str(key)), 'r') - except IOError, e: - if e.errno == errno.ENOENT: - raise KeyError('No message with key: %s' % key) - else: - raise - try: - if self._locked: - _lock_file(f) - try: - return f.read() - finally: - if self._locked: - _unlock_file(f) - finally: - f.close() - - def get_file(self, key): - """Return a file-like representation or raise a KeyError.""" - try: - f = open(os.path.join(self._path, str(key)), 'rb') - except IOError, e: - if e.errno == errno.ENOENT: - raise KeyError('No message with key: %s' % key) - else: - raise - return _ProxyFile(f) - - def iterkeys(self): - """Return an iterator over keys.""" - return iter(sorted(int(entry) for entry in os.listdir(self._path) - if entry.isdigit())) - - def has_key(self, key): - """Return True if the keyed message exists, False otherwise.""" - return os.path.exists(os.path.join(self._path, str(key))) - - def __len__(self): - """Return a count of messages in the mailbox.""" - return len(list(self.iterkeys())) - - def lock(self): - """Lock the mailbox.""" - if not self._locked: - self._file = open(os.path.join(self._path, '.mh_sequences'), 'rb+') - _lock_file(self._file) - self._locked = True - - def unlock(self): - """Unlock the mailbox if it is locked.""" - if self._locked: - _unlock_file(self._file) - _sync_close(self._file) - del self._file - self._locked = False - - def flush(self): - """Write any pending changes to the disk.""" - return - - def close(self): - """Flush and close the mailbox.""" - if self._locked: - self.unlock() - - def list_folders(self): - """Return a list of folder names.""" - result = [] - for entry in os.listdir(self._path): - if os.path.isdir(os.path.join(self._path, entry)): - result.append(entry) - return result - - def get_folder(self, folder): - """Return an MH instance for the named folder.""" - return MH(os.path.join(self._path, folder), - factory=self._factory, create=False) - - def add_folder(self, folder): - """Create a folder and return an MH instance representing it.""" - return MH(os.path.join(self._path, folder), - factory=self._factory) - - def remove_folder(self, folder): - """Delete the named folder, which must be empty.""" - path = os.path.join(self._path, folder) - entries = os.listdir(path) - if entries == ['.mh_sequences']: - os.remove(os.path.join(path, '.mh_sequences')) - elif entries == []: - pass - else: - raise NotEmptyError('Folder not empty: %s' % self._path) - os.rmdir(path) - - def get_sequences(self): - """Return a name-to-key-list dictionary to define each sequence.""" - results = {} - f = open(os.path.join(self._path, '.mh_sequences'), 'r') - try: - all_keys = set(self.keys()) - for line in f: - try: - name, contents = line.split(':') - keys = set() - for spec in contents.split(): - if spec.isdigit(): - keys.add(int(spec)) - else: - start, stop = (int(x) for x in spec.split('-')) - keys.update(range(start, stop + 1)) - results[name] = [key for key in sorted(keys) \ - if key in all_keys] - if len(results[name]) == 0: - del results[name] - except ValueError: - raise FormatError('Invalid sequence specification: %s' % - line.rstrip()) - finally: - f.close() - return results - - def set_sequences(self, sequences): - """Set sequences using the given name-to-key-list dictionary.""" - f = open(os.path.join(self._path, '.mh_sequences'), 'r+') - try: - os.close(os.open(f.name, os.O_WRONLY | os.O_TRUNC)) - for name, keys in sequences.iteritems(): - if len(keys) == 0: - continue - f.write('%s:' % name) - prev = None - completing = False - for key in sorted(set(keys)): - if key - 1 == prev: - if not completing: - completing = True - f.write('-') - elif completing: - completing = False - f.write('%s %s' % (prev, key)) - else: - f.write(' %s' % key) - prev = key - if completing: - f.write(str(prev) + '\n') - else: - f.write('\n') - finally: - _sync_close(f) - - def pack(self): - """Re-name messages to eliminate numbering gaps. Invalidates keys.""" - sequences = self.get_sequences() - prev = 0 - changes = [] - for key in self.iterkeys(): - if key - 1 != prev: - changes.append((key, prev + 1)) - if hasattr(os, 'link'): - os.link(os.path.join(self._path, str(key)), - os.path.join(self._path, str(prev + 1))) - os.unlink(os.path.join(self._path, str(key))) - else: - os.rename(os.path.join(self._path, str(key)), - os.path.join(self._path, str(prev + 1))) - prev += 1 - self._next_key = prev + 1 - if len(changes) == 0: - return - for name, key_list in sequences.items(): - for old, new in changes: - if old in key_list: - key_list[key_list.index(old)] = new - self.set_sequences(sequences) - - def _dump_sequences(self, message, key): - """Inspect a new MHMessage and update sequences appropriately.""" - pending_sequences = message.get_sequences() - all_sequences = self.get_sequences() - for name, key_list in all_sequences.iteritems(): - if name in pending_sequences: - key_list.append(key) - elif key in key_list: - del key_list[key_list.index(key)] - for sequence in pending_sequences: - if sequence not in all_sequences: - all_sequences[sequence] = [key] - self.set_sequences(all_sequences) - - -class Babyl(_singlefileMailbox): - """An Rmail-style Babyl mailbox.""" - - _special_labels = frozenset(('unseen', 'deleted', 'filed', 'answered', - 'forwarded', 'edited', 'resent')) - - def __init__(self, path, factory=None, create=True): - """Initialize a Babyl mailbox.""" - _singlefileMailbox.__init__(self, path, factory, create) - self._labels = {} - - def add(self, message): - """Add message and return assigned key.""" - key = _singlefileMailbox.add(self, message) - if isinstance(message, BabylMessage): - self._labels[key] = message.get_labels() - return key - - def remove(self, key): - """Remove the keyed message; raise KeyError if it doesn't exist.""" - _singlefileMailbox.remove(self, key) - if key in self._labels: - del self._labels[key] - - def __setitem__(self, key, message): - """Replace the keyed message; raise KeyError if it doesn't exist.""" - _singlefileMailbox.__setitem__(self, key, message) - if isinstance(message, BabylMessage): - self._labels[key] = message.get_labels() - - def get_message(self, key): - """Return a Message representation or raise a KeyError.""" - start, stop = self._lookup(key) - self._file.seek(start) - self._file.readline() # Skip '1,' line specifying labels. - original_headers = StringIO.StringIO() - while True: - line = self._file.readline() - if line == '*** EOOH ***' + os.linesep or line == '': - break - original_headers.write(line.replace(os.linesep, '\n')) - visible_headers = StringIO.StringIO() - while True: - line = self._file.readline() - if line == os.linesep or line == '': - break - visible_headers.write(line.replace(os.linesep, '\n')) - body = self._file.read(stop - self._file.tell()).replace(os.linesep, - '\n') - msg = BabylMessage(original_headers.getvalue() + body) - msg.set_visible(visible_headers.getvalue()) - if key in self._labels: - msg.set_labels(self._labels[key]) - return msg - - def get_string(self, key): - """Return a string representation or raise a KeyError.""" - start, stop = self._lookup(key) - self._file.seek(start) - self._file.readline() # Skip '1,' line specifying labels. - original_headers = StringIO.StringIO() - while True: - line = self._file.readline() - if line == '*** EOOH ***' + os.linesep or line == '': - break - original_headers.write(line.replace(os.linesep, '\n')) - while True: - line = self._file.readline() - if line == os.linesep or line == '': - break - return original_headers.getvalue() + \ - self._file.read(stop - self._file.tell()).replace(os.linesep, - '\n') - - def get_file(self, key): - """Return a file-like representation or raise a KeyError.""" - return StringIO.StringIO(self.get_string(key).replace('\n', - os.linesep)) - - def get_labels(self): - """Return a list of user-defined labels in the mailbox.""" - self._lookup() - labels = set() - for label_list in self._labels.values(): - labels.update(label_list) - labels.difference_update(self._special_labels) - return list(labels) - - def _generate_toc(self): - """Generate key-to-(start, stop) table of contents.""" - starts, stops = [], [] - self._file.seek(0) - next_pos = 0 - label_lists = [] - while True: - line_pos = next_pos - line = self._file.readline() - next_pos = self._file.tell() - if line == '\037\014' + os.linesep: - if len(stops) < len(starts): - stops.append(line_pos - len(os.linesep)) - starts.append(next_pos) - labels = [label.strip() for label - in self._file.readline()[1:].split(',') - if label.strip() != ''] - label_lists.append(labels) - elif line == '\037' or line == '\037' + os.linesep: - if len(stops) < len(starts): - stops.append(line_pos - len(os.linesep)) - elif line == '': - stops.append(line_pos - len(os.linesep)) - break - self._toc = dict(enumerate(zip(starts, stops))) - self._labels = dict(enumerate(label_lists)) - self._next_key = len(self._toc) - self._file.seek(0, 2) - self._file_length = self._file.tell() - - def _pre_mailbox_hook(self, f): - """Called before writing the mailbox to file f.""" - f.write('BABYL OPTIONS:%sVersion: 5%sLabels:%s%s\037' % - (os.linesep, os.linesep, ','.join(self.get_labels()), - os.linesep)) - - def _pre_message_hook(self, f): - """Called before writing each message to file f.""" - f.write('\014' + os.linesep) - - def _post_message_hook(self, f): - """Called after writing each message to file f.""" - f.write(os.linesep + '\037') - - def _install_message(self, message): - """Write message contents and return (start, stop).""" - start = self._file.tell() - if isinstance(message, BabylMessage): - special_labels = [] - labels = [] - for label in message.get_labels(): - if label in self._special_labels: - special_labels.append(label) - else: - labels.append(label) - self._file.write('1') - for label in special_labels: - self._file.write(', ' + label) - self._file.write(',,') - for label in labels: - self._file.write(' ' + label + ',') - self._file.write(os.linesep) - else: - self._file.write('1,,' + os.linesep) - if isinstance(message, email.message.Message): - orig_buffer = StringIO.StringIO() - orig_generator = email.generator.Generator(orig_buffer, False, 0) - orig_generator.flatten(message) - orig_buffer.seek(0) - while True: - line = orig_buffer.readline() - self._file.write(line.replace('\n', os.linesep)) - if line == '\n' or line == '': - break - self._file.write('*** EOOH ***' + os.linesep) - if isinstance(message, BabylMessage): - vis_buffer = StringIO.StringIO() - vis_generator = email.generator.Generator(vis_buffer, False, 0) - vis_generator.flatten(message.get_visible()) - while True: - line = vis_buffer.readline() - self._file.write(line.replace('\n', os.linesep)) - if line == '\n' or line == '': - break - else: - orig_buffer.seek(0) - while True: - line = orig_buffer.readline() - self._file.write(line.replace('\n', os.linesep)) - if line == '\n' or line == '': - break - while True: - buffer = orig_buffer.read(4096) # Buffer size is arbitrary. - if buffer == '': - break - self._file.write(buffer.replace('\n', os.linesep)) - elif isinstance(message, str): - body_start = message.find('\n\n') + 2 - if body_start - 2 != -1: - self._file.write(message[:body_start].replace('\n', - os.linesep)) - self._file.write('*** EOOH ***' + os.linesep) - self._file.write(message[:body_start].replace('\n', - os.linesep)) - self._file.write(message[body_start:].replace('\n', - os.linesep)) - else: - self._file.write('*** EOOH ***' + os.linesep + os.linesep) - self._file.write(message.replace('\n', os.linesep)) - elif hasattr(message, 'readline'): - original_pos = message.tell() - first_pass = True - while True: - line = message.readline() - self._file.write(line.replace('\n', os.linesep)) - if line == '\n' or line == '': - if first_pass: - first_pass = False - self._file.write('*** EOOH ***' + os.linesep) - message.seek(original_pos) - else: - break - while True: - buffer = message.read(4096) # Buffer size is arbitrary. - if buffer == '': - break - self._file.write(buffer.replace('\n', os.linesep)) - else: - raise TypeError('Invalid message type: %s' % type(message)) - stop = self._file.tell() - return (start, stop) - - -class Message(email.message.Message): - """Message with mailbox-format-specific properties.""" - - def __init__(self, message=None): - """Initialize a Message instance.""" - if isinstance(message, email.message.Message): - self._become_message(copy.deepcopy(message)) - if isinstance(message, Message): - message._explain_to(self) - elif isinstance(message, str): - self._become_message(email.message_from_string(message)) - elif hasattr(message, "read"): - self._become_message(email.message_from_file(message)) - elif message is None: - email.message.Message.__init__(self) - else: - raise TypeError('Invalid message type: %s' % type(message)) - - def _become_message(self, message): - """Assume the non-format-specific state of message.""" - for name in ('_headers', '_unixfrom', '_payload', '_charset', - 'preamble', 'epilogue', 'defects', '_default_type'): - self.__dict__[name] = message.__dict__[name] - - def _explain_to(self, message): - """Copy format-specific state to message insofar as possible.""" - if isinstance(message, Message): - return # There's nothing format-specific to explain. - else: - raise TypeError('Cannot convert to specified type') - - -class MaildirMessage(Message): - """Message with Maildir-specific properties.""" - - def __init__(self, message=None): - """Initialize a MaildirMessage instance.""" - self._subdir = 'new' - self._info = '' - self._date = time.time() - Message.__init__(self, message) - - def get_subdir(self): - """Return 'new' or 'cur'.""" - return self._subdir - - def set_subdir(self, subdir): - """Set subdir to 'new' or 'cur'.""" - if subdir == 'new' or subdir == 'cur': - self._subdir = subdir - else: - raise ValueError("subdir must be 'new' or 'cur': %s" % subdir) - - def get_flags(self): - """Return as a string the flags that are set.""" - if self._info.startswith('2,'): - return self._info[2:] - else: - return '' - - def set_flags(self, flags): - """Set the given flags and unset all others.""" - self._info = '2,' + ''.join(sorted(flags)) - - def add_flag(self, flag): - """Set the given flag(s) without changing others.""" - self.set_flags(''.join(set(self.get_flags()) | set(flag))) - - def remove_flag(self, flag): - """Unset the given string flag(s) without changing others.""" - if self.get_flags() != '': - self.set_flags(''.join(set(self.get_flags()) - set(flag))) - - def get_date(self): - """Return delivery date of message, in seconds since the epoch.""" - return self._date - - def set_date(self, date): - """Set delivery date of message, in seconds since the epoch.""" - try: - self._date = float(date) - except ValueError: - raise TypeError("can't convert to float: %s" % date) - - def get_info(self): - """Get the message's "info" as a string.""" - return self._info - - def set_info(self, info): - """Set the message's "info" string.""" - if isinstance(info, str): - self._info = info - else: - raise TypeError('info must be a string: %s' % type(info)) - - def _explain_to(self, message): - """Copy Maildir-specific state to message insofar as possible.""" - if isinstance(message, MaildirMessage): - message.set_flags(self.get_flags()) - message.set_subdir(self.get_subdir()) - message.set_date(self.get_date()) - elif isinstance(message, _mboxMMDFMessage): - flags = set(self.get_flags()) - if 'S' in flags: - message.add_flag('R') - if self.get_subdir() == 'cur': - message.add_flag('O') - if 'T' in flags: - message.add_flag('D') - if 'F' in flags: - message.add_flag('F') - if 'R' in flags: - message.add_flag('A') - message.set_from('MAILER-DAEMON', time.gmtime(self.get_date())) - elif isinstance(message, MHMessage): - flags = set(self.get_flags()) - if 'S' not in flags: - message.add_sequence('unseen') - if 'R' in flags: - message.add_sequence('replied') - if 'F' in flags: - message.add_sequence('flagged') - elif isinstance(message, BabylMessage): - flags = set(self.get_flags()) - if 'S' not in flags: - message.add_label('unseen') - if 'T' in flags: - message.add_label('deleted') - if 'R' in flags: - message.add_label('answered') - if 'P' in flags: - message.add_label('forwarded') - elif isinstance(message, Message): - pass - else: - raise TypeError('Cannot convert to specified type: %s' % - type(message)) - - -class _mboxMMDFMessage(Message): - """Message with mbox- or MMDF-specific properties.""" - - def __init__(self, message=None): - """Initialize an mboxMMDFMessage instance.""" - self.set_from('MAILER-DAEMON', True) - if isinstance(message, email.message.Message): - unixfrom = message.get_unixfrom() - if unixfrom is not None and unixfrom.startswith('From '): - self.set_from(unixfrom[5:]) - Message.__init__(self, message) - - def get_from(self): - """Return contents of "From " line.""" - return self._from - - def set_from(self, from_, time_=None): - """Set "From " line, formatting and appending time_ if specified.""" - if time_ is not None: - if time_ is True: - time_ = time.gmtime() - from_ += ' ' + time.asctime(time_) - self._from = from_ - - def get_flags(self): - """Return as a string the flags that are set.""" - return self.get('Status', '') + self.get('X-Status', '') - - def set_flags(self, flags): - """Set the given flags and unset all others.""" - flags = set(flags) - status_flags, xstatus_flags = '', '' - for flag in ('R', 'O'): - if flag in flags: - status_flags += flag - flags.remove(flag) - for flag in ('D', 'F', 'A'): - if flag in flags: - xstatus_flags += flag - flags.remove(flag) - xstatus_flags += ''.join(sorted(flags)) - try: - self.replace_header('Status', status_flags) - except KeyError: - self.add_header('Status', status_flags) - try: - self.replace_header('X-Status', xstatus_flags) - except KeyError: - self.add_header('X-Status', xstatus_flags) - - def add_flag(self, flag): - """Set the given flag(s) without changing others.""" - self.set_flags(''.join(set(self.get_flags()) | set(flag))) - - def remove_flag(self, flag): - """Unset the given string flag(s) without changing others.""" - if 'Status' in self or 'X-Status' in self: - self.set_flags(''.join(set(self.get_flags()) - set(flag))) - - def _explain_to(self, message): - """Copy mbox- or MMDF-specific state to message insofar as possible.""" - if isinstance(message, MaildirMessage): - flags = set(self.get_flags()) - if 'O' in flags: - message.set_subdir('cur') - if 'F' in flags: - message.add_flag('F') - if 'A' in flags: - message.add_flag('R') - if 'R' in flags: - message.add_flag('S') - if 'D' in flags: - message.add_flag('T') - del message['status'] - del message['x-status'] - maybe_date = ' '.join(self.get_from().split()[-5:]) - try: - message.set_date(calendar.timegm(time.strptime(maybe_date, - '%a %b %d %H:%M:%S %Y'))) - except (ValueError, OverflowError): - pass - elif isinstance(message, _mboxMMDFMessage): - message.set_flags(self.get_flags()) - message.set_from(self.get_from()) - elif isinstance(message, MHMessage): - flags = set(self.get_flags()) - if 'R' not in flags: - message.add_sequence('unseen') - if 'A' in flags: - message.add_sequence('replied') - if 'F' in flags: - message.add_sequence('flagged') - del message['status'] - del message['x-status'] - elif isinstance(message, BabylMessage): - flags = set(self.get_flags()) - if 'R' not in flags: - message.add_label('unseen') - if 'D' in flags: - message.add_label('deleted') - if 'A' in flags: - message.add_label('answered') - del message['status'] - del message['x-status'] - elif isinstance(message, Message): - pass - else: - raise TypeError('Cannot convert to specified type: %s' % - type(message)) - - -class mboxMessage(_mboxMMDFMessage): - """Message with mbox-specific properties.""" - - -class MHMessage(Message): - """Message with MH-specific properties.""" - - def __init__(self, message=None): - """Initialize an MHMessage instance.""" - self._sequences = [] - Message.__init__(self, message) - - def get_sequences(self): - """Return a list of sequences that include the message.""" - return self._sequences[:] - - def set_sequences(self, sequences): - """Set the list of sequences that include the message.""" - self._sequences = list(sequences) - - def add_sequence(self, sequence): - """Add sequence to list of sequences including the message.""" - if isinstance(sequence, str): - if not sequence in self._sequences: - self._sequences.append(sequence) - else: - raise TypeError('sequence must be a string: %s' % type(sequence)) - - def remove_sequence(self, sequence): - """Remove sequence from the list of sequences including the message.""" - try: - self._sequences.remove(sequence) - except ValueError: - pass - - def _explain_to(self, message): - """Copy MH-specific state to message insofar as possible.""" - if isinstance(message, MaildirMessage): - sequences = set(self.get_sequences()) - if 'unseen' in sequences: - message.set_subdir('cur') - else: - message.set_subdir('cur') - message.add_flag('S') - if 'flagged' in sequences: - message.add_flag('F') - if 'replied' in sequences: - message.add_flag('R') - elif isinstance(message, _mboxMMDFMessage): - sequences = set(self.get_sequences()) - if 'unseen' not in sequences: - message.add_flag('RO') - else: - message.add_flag('O') - if 'flagged' in sequences: - message.add_flag('F') - if 'replied' in sequences: - message.add_flag('A') - elif isinstance(message, MHMessage): - for sequence in self.get_sequences(): - message.add_sequence(sequence) - elif isinstance(message, BabylMessage): - sequences = set(self.get_sequences()) - if 'unseen' in sequences: - message.add_label('unseen') - if 'replied' in sequences: - message.add_label('answered') - elif isinstance(message, Message): - pass - else: - raise TypeError('Cannot convert to specified type: %s' % - type(message)) - - -class BabylMessage(Message): - """Message with Babyl-specific properties.""" - - def __init__(self, message=None): - """Initialize a BabylMessage instance.""" - self._labels = [] - self._visible = Message() - Message.__init__(self, message) - - def get_labels(self): - """Return a list of labels on the message.""" - return self._labels[:] - - def set_labels(self, labels): - """Set the list of labels on the message.""" - self._labels = list(labels) - - def add_label(self, label): - """Add label to list of labels on the message.""" - if isinstance(label, str): - if label not in self._labels: - self._labels.append(label) - else: - raise TypeError('label must be a string: %s' % type(label)) - - def remove_label(self, label): - """Remove label from the list of labels on the message.""" - try: - self._labels.remove(label) - except ValueError: - pass - - def get_visible(self): - """Return a Message representation of visible headers.""" - return Message(self._visible) - - def set_visible(self, visible): - """Set the Message representation of visible headers.""" - self._visible = Message(visible) - - def update_visible(self): - """Update and/or sensibly generate a set of visible headers.""" - for header in self._visible.keys(): - if header in self: - self._visible.replace_header(header, self[header]) - else: - del self._visible[header] - for header in ('Date', 'From', 'Reply-To', 'To', 'CC', 'Subject'): - if header in self and header not in self._visible: - self._visible[header] = self[header] - - def _explain_to(self, message): - """Copy Babyl-specific state to message insofar as possible.""" - if isinstance(message, MaildirMessage): - labels = set(self.get_labels()) - if 'unseen' in labels: - message.set_subdir('cur') - else: - message.set_subdir('cur') - message.add_flag('S') - if 'forwarded' in labels or 'resent' in labels: - message.add_flag('P') - if 'answered' in labels: - message.add_flag('R') - if 'deleted' in labels: - message.add_flag('T') - elif isinstance(message, _mboxMMDFMessage): - labels = set(self.get_labels()) - if 'unseen' not in labels: - message.add_flag('RO') - else: - message.add_flag('O') - if 'deleted' in labels: - message.add_flag('D') - if 'answered' in labels: - message.add_flag('A') - elif isinstance(message, MHMessage): - labels = set(self.get_labels()) - if 'unseen' in labels: - message.add_sequence('unseen') - if 'answered' in labels: - message.add_sequence('replied') - elif isinstance(message, BabylMessage): - message.set_visible(self.get_visible()) - for label in self.get_labels(): - message.add_label(label) - elif isinstance(message, Message): - pass - else: - raise TypeError('Cannot convert to specified type: %s' % - type(message)) - - -class MMDFMessage(_mboxMMDFMessage): - """Message with MMDF-specific properties.""" - - -class _ProxyFile: - """A read-only wrapper of a file.""" - - def __init__(self, f, pos=None): - """Initialize a _ProxyFile.""" - self._file = f - if pos is None: - self._pos = f.tell() - else: - self._pos = pos - - def read(self, size=None): - """Read bytes.""" - return self._read(size, self._file.read) - - def readline(self, size=None): - """Read a line.""" - return self._read(size, self._file.readline) - - def readlines(self, sizehint=None): - """Read multiple lines.""" - result = [] - for line in self: - result.append(line) - if sizehint is not None: - sizehint -= len(line) - if sizehint <= 0: - break - return result - - def __iter__(self): - """Iterate over lines.""" - return iter(self.readline, "") - - def tell(self): - """Return the position.""" - return self._pos - - def seek(self, offset, whence=0): - """Change position.""" - if whence == 1: - self._file.seek(self._pos) - self._file.seek(offset, whence) - self._pos = self._file.tell() - - def close(self): - """Close the file.""" - if hasattr(self, '_file'): - if hasattr(self._file, 'close'): - self._file.close() - del self._file - - def _read(self, size, read_method): - """Read size bytes using read_method.""" - if size is None: - size = -1 - self._file.seek(self._pos) - result = read_method(size) - self._pos = self._file.tell() - return result - - -class _PartialFile(_ProxyFile): - """A read-only wrapper of part of a file.""" - - def __init__(self, f, start=None, stop=None): - """Initialize a _PartialFile.""" - _ProxyFile.__init__(self, f, start) - self._start = start - self._stop = stop - - def tell(self): - """Return the position with respect to start.""" - return _ProxyFile.tell(self) - self._start - - def seek(self, offset, whence=0): - """Change position, possibly with respect to start or stop.""" - if whence == 0: - self._pos = self._start - whence = 1 - elif whence == 2: - self._pos = self._stop - whence = 1 - _ProxyFile.seek(self, offset, whence) - - def _read(self, size, read_method): - """Read size bytes using read_method, honoring start and stop.""" - remaining = self._stop - self._pos - if remaining <= 0: - return '' - if size is None or size < 0 or size > remaining: - size = remaining - return _ProxyFile._read(self, size, read_method) - - def close(self): - # do *not* close the underlying file object for partial files, - # since it's global to the mailbox object - if hasattr(self, '_file'): - del self._file - - -def _lock_file(f, dotlock=True): - """Lock file f using lockf and dot locking.""" - dotlock_done = False - try: - if fcntl: - try: - fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB) - except IOError, e: - if e.errno in (errno.EAGAIN, errno.EACCES, errno.EROFS): - raise ExternalClashError('lockf: lock unavailable: %s' % - f.name) - else: - raise - if dotlock: - try: - pre_lock = _create_temporary(f.name + '.lock') - pre_lock.close() - except IOError, e: - if e.errno in (errno.EACCES, errno.EROFS): - return # Without write access, just skip dotlocking. - else: - raise - try: - if hasattr(os, 'link'): - os.link(pre_lock.name, f.name + '.lock') - dotlock_done = True - os.unlink(pre_lock.name) - else: - os.rename(pre_lock.name, f.name + '.lock') - dotlock_done = True - except OSError, e: - if e.errno == errno.EEXIST or \ - (os.name == 'os2' and e.errno == errno.EACCES): - os.remove(pre_lock.name) - raise ExternalClashError('dot lock unavailable: %s' % - f.name) - else: - raise - except: - if fcntl: - fcntl.lockf(f, fcntl.LOCK_UN) - if dotlock_done: - os.remove(f.name + '.lock') - raise - -def _unlock_file(f): - """Unlock file f using lockf and dot locking.""" - if fcntl: - fcntl.lockf(f, fcntl.LOCK_UN) - if os.path.exists(f.name + '.lock'): - os.remove(f.name + '.lock') - -def _create_carefully(path): - """Create a file if it doesn't exist and open for reading and writing.""" - fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_RDWR, 0666) - try: - return open(path, 'rb+') - finally: - os.close(fd) - -def _create_temporary(path): - """Create a temp file based on path and open for reading and writing.""" - return _create_carefully('%s.%s.%s.%s' % (path, int(time.time()), - socket.gethostname(), - os.getpid())) - -def _sync_flush(f): - """Ensure changes to file f are physically on disk.""" - f.flush() - if hasattr(os, 'fsync'): - os.fsync(f.fileno()) - -def _sync_close(f): - """Close file f, ensuring all changes are physically on disk.""" - _sync_flush(f) - f.close() - -## Start: classes from the original module (for backward compatibility). - -# Note that the Maildir class, whose name is unchanged, itself offers a next() -# method for backward compatibility. - -class _Mailbox: - - def __init__(self, fp, factory=rfc822.Message): - self.fp = fp - self.seekp = 0 - self.factory = factory - - def __iter__(self): - return iter(self.next, None) - - def next(self): - while 1: - self.fp.seek(self.seekp) - try: - self._search_start() - except EOFError: - self.seekp = self.fp.tell() - return None - start = self.fp.tell() - self._search_end() - self.seekp = stop = self.fp.tell() - if start != stop: - break - return self.factory(_PartialFile(self.fp, start, stop)) - -# Recommended to use PortableUnixMailbox instead! -class UnixMailbox(_Mailbox): - - def _search_start(self): - while 1: - pos = self.fp.tell() - line = self.fp.readline() - if not line: - raise EOFError - if line[:5] == 'From ' and self._isrealfromline(line): - self.fp.seek(pos) - return - - def _search_end(self): - self.fp.readline() # Throw away header line - while 1: - pos = self.fp.tell() - line = self.fp.readline() - if not line: - return - if line[:5] == 'From ' and self._isrealfromline(line): - self.fp.seek(pos) - return - - # An overridable mechanism to test for From-line-ness. You can either - # specify a different regular expression or define a whole new - # _isrealfromline() method. Note that this only gets called for lines - # starting with the 5 characters "From ". - # - # BAW: According to - #http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html - # the only portable, reliable way to find message delimiters in a BSD (i.e - # Unix mailbox) style folder is to search for "\n\nFrom .*\n", or at the - # beginning of the file, "^From .*\n". While _fromlinepattern below seems - # like a good idea, in practice, there are too many variations for more - # strict parsing of the line to be completely accurate. - # - # _strict_isrealfromline() is the old version which tries to do stricter - # parsing of the From_ line. _portable_isrealfromline() simply returns - # true, since it's never called if the line doesn't already start with - # "From ". - # - # This algorithm, and the way it interacts with _search_start() and - # _search_end() may not be completely correct, because it doesn't check - # that the two characters preceding "From " are \n\n or the beginning of - # the file. Fixing this would require a more extensive rewrite than is - # necessary. For convenience, we've added a PortableUnixMailbox class - # which does no checking of the format of the 'From' line. - - _fromlinepattern = (r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" - r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*" - r"[^\s]*\s*" - "$") - _regexp = None - - def _strict_isrealfromline(self, line): - if not self._regexp: - import re - self._regexp = re.compile(self._fromlinepattern) - return self._regexp.match(line) - - def _portable_isrealfromline(self, line): - return True - - _isrealfromline = _strict_isrealfromline - - -class PortableUnixMailbox(UnixMailbox): - _isrealfromline = UnixMailbox._portable_isrealfromline - - -class MmdfMailbox(_Mailbox): - - def _search_start(self): - while 1: - line = self.fp.readline() - if not line: - raise EOFError - if line[:5] == '\001\001\001\001\n': - return - - def _search_end(self): - while 1: - pos = self.fp.tell() - line = self.fp.readline() - if not line: - return - if line == '\001\001\001\001\n': - self.fp.seek(pos) - return - - -class MHMailbox: - - def __init__(self, dirname, factory=rfc822.Message): - import re - pat = re.compile('^[1-9][0-9]*$') - self.dirname = dirname - # the three following lines could be combined into: - # list = map(long, filter(pat.match, os.listdir(self.dirname))) - list = os.listdir(self.dirname) - list = filter(pat.match, list) - list = map(long, list) - list.sort() - # This only works in Python 1.6 or later; - # before that str() added 'L': - self.boxes = map(str, list) - self.boxes.reverse() - self.factory = factory - - def __iter__(self): - return iter(self.next, None) - - def next(self): - if not self.boxes: - return None - fn = self.boxes.pop() - fp = open(os.path.join(self.dirname, fn)) - msg = self.factory(fp) - try: - msg._mh_msgno = fn - except (AttributeError, TypeError): - pass - return msg - - -class BabylMailbox(_Mailbox): - - def _search_start(self): - while 1: - line = self.fp.readline() - if not line: - raise EOFError - if line == '*** EOOH ***\n': - return - - def _search_end(self): - while 1: - pos = self.fp.tell() - line = self.fp.readline() - if not line: - return - if line == '\037\014\n' or line == '\037': - self.fp.seek(pos) - return - -## End: classes from the original module (for backward compatibility). - - -class Error(Exception): - """Raised for module-specific errors.""" - -class NoSuchMailboxError(Error): - """The specified mailbox does not exist and won't be created.""" - -class NotEmptyError(Error): - """The specified mailbox is not empty and deletion was requested.""" - -class ExternalClashError(Error): - """Another process caused an action to fail.""" - -class FormatError(Error): - """A file appears to have an invalid format.""" diff --git a/python/Lib/mailcap.py b/python/Lib/mailcap.py deleted file mode 100755 index 04077ba0db..0000000000 --- a/python/Lib/mailcap.py +++ /dev/null @@ -1,255 +0,0 @@ -"""Mailcap file handling. See RFC 1524.""" - -import os - -__all__ = ["getcaps","findmatch"] - -# Part 1: top-level interface. - -def getcaps(): - """Return a dictionary containing the mailcap database. - - The dictionary maps a MIME type (in all lowercase, e.g. 'text/plain') - to a list of dictionaries corresponding to mailcap entries. The list - collects all the entries for that MIME type from all available mailcap - files. Each dictionary contains key-value pairs for that MIME type, - where the viewing command is stored with the key "view". - - """ - caps = {} - for mailcap in listmailcapfiles(): - try: - fp = open(mailcap, 'r') - except IOError: - continue - with fp: - morecaps = readmailcapfile(fp) - for key, value in morecaps.iteritems(): - if not key in caps: - caps[key] = value - else: - caps[key] = caps[key] + value - return caps - -def listmailcapfiles(): - """Return a list of all mailcap files found on the system.""" - # XXX Actually, this is Unix-specific - if 'MAILCAPS' in os.environ: - str = os.environ['MAILCAPS'] - mailcaps = str.split(':') - else: - if 'HOME' in os.environ: - home = os.environ['HOME'] - else: - # Don't bother with getpwuid() - home = '.' # Last resort - mailcaps = [home + '/.mailcap', '/etc/mailcap', - '/usr/etc/mailcap', '/usr/local/etc/mailcap'] - return mailcaps - - -# Part 2: the parser. - -def readmailcapfile(fp): - """Read a mailcap file and return a dictionary keyed by MIME type. - - Each MIME type is mapped to an entry consisting of a list of - dictionaries; the list will contain more than one such dictionary - if a given MIME type appears more than once in the mailcap file. - Each dictionary contains key-value pairs for that MIME type, where - the viewing command is stored with the key "view". - """ - caps = {} - while 1: - line = fp.readline() - if not line: break - # Ignore comments and blank lines - if line[0] == '#' or line.strip() == '': - continue - nextline = line - # Join continuation lines - while nextline[-2:] == '\\\n': - nextline = fp.readline() - if not nextline: nextline = '\n' - line = line[:-2] + nextline - # Parse the line - key, fields = parseline(line) - if not (key and fields): - continue - # Normalize the key - types = key.split('/') - for j in range(len(types)): - types[j] = types[j].strip() - key = '/'.join(types).lower() - # Update the database - if key in caps: - caps[key].append(fields) - else: - caps[key] = [fields] - return caps - -def parseline(line): - """Parse one entry in a mailcap file and return a dictionary. - - The viewing command is stored as the value with the key "view", - and the rest of the fields produce key-value pairs in the dict. - """ - fields = [] - i, n = 0, len(line) - while i < n: - field, i = parsefield(line, i, n) - fields.append(field) - i = i+1 # Skip semicolon - if len(fields) < 2: - return None, None - key, view, rest = fields[0], fields[1], fields[2:] - fields = {'view': view} - for field in rest: - i = field.find('=') - if i < 0: - fkey = field - fvalue = "" - else: - fkey = field[:i].strip() - fvalue = field[i+1:].strip() - if fkey in fields: - # Ignore it - pass - else: - fields[fkey] = fvalue - return key, fields - -def parsefield(line, i, n): - """Separate one key-value pair in a mailcap entry.""" - start = i - while i < n: - c = line[i] - if c == ';': - break - elif c == '\\': - i = i+2 - else: - i = i+1 - return line[start:i].strip(), i - - -# Part 3: using the database. - -def findmatch(caps, MIMEtype, key='view', filename="/dev/null", plist=[]): - """Find a match for a mailcap entry. - - Return a tuple containing the command line, and the mailcap entry - used; (None, None) if no match is found. This may invoke the - 'test' command of several matching entries before deciding which - entry to use. - - """ - entries = lookup(caps, MIMEtype, key) - # XXX This code should somehow check for the needsterminal flag. - for e in entries: - if 'test' in e: - test = subst(e['test'], filename, plist) - if test and os.system(test) != 0: - continue - command = subst(e[key], MIMEtype, filename, plist) - return command, e - return None, None - -def lookup(caps, MIMEtype, key=None): - entries = [] - if MIMEtype in caps: - entries = entries + caps[MIMEtype] - MIMEtypes = MIMEtype.split('/') - MIMEtype = MIMEtypes[0] + '/*' - if MIMEtype in caps: - entries = entries + caps[MIMEtype] - if key is not None: - entries = filter(lambda e, key=key: key in e, entries) - return entries - -def subst(field, MIMEtype, filename, plist=[]): - # XXX Actually, this is Unix-specific - res = '' - i, n = 0, len(field) - while i < n: - c = field[i]; i = i+1 - if c != '%': - if c == '\\': - c = field[i:i+1]; i = i+1 - res = res + c - else: - c = field[i]; i = i+1 - if c == '%': - res = res + c - elif c == 's': - res = res + filename - elif c == 't': - res = res + MIMEtype - elif c == '{': - start = i - while i < n and field[i] != '}': - i = i+1 - name = field[start:i] - i = i+1 - res = res + findparam(name, plist) - # XXX To do: - # %n == number of parts if type is multipart/* - # %F == list of alternating type and filename for parts - else: - res = res + '%' + c - return res - -def findparam(name, plist): - name = name.lower() + '=' - n = len(name) - for p in plist: - if p[:n].lower() == name: - return p[n:] - return '' - - -# Part 4: test program. - -def test(): - import sys - caps = getcaps() - if not sys.argv[1:]: - show(caps) - return - for i in range(1, len(sys.argv), 2): - args = sys.argv[i:i+2] - if len(args) < 2: - print "usage: mailcap [MIMEtype file] ..." - return - MIMEtype = args[0] - file = args[1] - command, e = findmatch(caps, MIMEtype, 'view', file) - if not command: - print "No viewer found for", type - else: - print "Executing:", command - sts = os.system(command) - if sts: - print "Exit status:", sts - -def show(caps): - print "Mailcap files:" - for fn in listmailcapfiles(): print "\t" + fn - print - if not caps: caps = getcaps() - print "Mailcap entries:" - print - ckeys = caps.keys() - ckeys.sort() - for type in ckeys: - print type - entries = caps[type] - for e in entries: - keys = e.keys() - keys.sort() - for k in keys: - print " %-15s" % k, e[k] - print - -if __name__ == '__main__': - test() diff --git a/python/Lib/markupbase.py b/python/Lib/markupbase.py deleted file mode 100755 index ddeb9835b8..0000000000 --- a/python/Lib/markupbase.py +++ /dev/null @@ -1,396 +0,0 @@ -"""Shared support for scanning document type declarations in HTML and XHTML. - -This module is used as a foundation for the HTMLParser and sgmllib -modules (indirectly, for htmllib as well). It has no documented -public API and should not be used directly. - -""" - -import re - -_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match -_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match -_commentclose = re.compile(r'--\s*>') -_markedsectionclose = re.compile(r']\s*]\s*>') - -# An analysis of the MS-Word extensions is available at -# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf - -_msmarkedsectionclose = re.compile(r']\s*>') - -del re - - -class ParserBase: - """Parser base class which provides some common support methods used - by the SGML/HTML and XHTML parsers.""" - - def __init__(self): - if self.__class__ is ParserBase: - raise RuntimeError( - "markupbase.ParserBase must be subclassed") - - def error(self, message): - raise NotImplementedError( - "subclasses of ParserBase must override error()") - - def reset(self): - self.lineno = 1 - self.offset = 0 - - def getpos(self): - """Return current line number and offset.""" - return self.lineno, self.offset - - # Internal -- update line number and offset. This should be - # called for each piece of data exactly once, in order -- in other - # words the concatenation of all the input strings to this - # function should be exactly the entire input. - def updatepos(self, i, j): - if i >= j: - return j - rawdata = self.rawdata - nlines = rawdata.count("\n", i, j) - if nlines: - self.lineno = self.lineno + nlines - pos = rawdata.rindex("\n", i, j) # Should not fail - self.offset = j-(pos+1) - else: - self.offset = self.offset + j-i - return j - - _decl_otherchars = '' - - # Internal -- parse declaration (for use by subclasses). - def parse_declaration(self, i): - # This is some sort of declaration; in "HTML as - # deployed," this should only be the document type - # declaration (""). - # ISO 8879:1986, however, has more complex - # declaration syntax for elements in , including: - # --comment-- - # [marked section] - # name in the following list: ENTITY, DOCTYPE, ELEMENT, - # ATTLIST, NOTATION, SHORTREF, USEMAP, - # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM - rawdata = self.rawdata - j = i + 2 - assert rawdata[i:j] == "": - # the empty comment - return j + 1 - if rawdata[j:j+1] in ("-", ""): - # Start of comment followed by buffer boundary, - # or just a buffer boundary. - return -1 - # A simple, practical version could look like: ((name|stringlit) S*) + '>' - n = len(rawdata) - if rawdata[j:j+2] == '--': #comment - # Locate --.*-- as the body of the comment - return self.parse_comment(i) - elif rawdata[j] == '[': #marked section - # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section - # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA - # Note that this is extended by Microsoft Office "Save as Web" function - # to include [if...] and [endif]. - return self.parse_marked_section(i) - else: #all other declaration elements - decltype, j = self._scan_name(j, i) - if j < 0: - return j - if decltype == "doctype": - self._decl_otherchars = '' - while j < n: - c = rawdata[j] - if c == ">": - # end of declaration syntax - data = rawdata[i+2:j] - if decltype == "doctype": - self.handle_decl(data) - else: - # According to the HTML5 specs sections "8.2.4.44 Bogus - # comment state" and "8.2.4.45 Markup declaration open - # state", a comment token should be emitted. - # Calling unknown_decl provides more flexibility though. - self.unknown_decl(data) - return j + 1 - if c in "\"'": - m = _declstringlit_match(rawdata, j) - if not m: - return -1 # incomplete - j = m.end() - elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": - name, j = self._scan_name(j, i) - elif c in self._decl_otherchars: - j = j + 1 - elif c == "[": - # this could be handled in a separate doctype parser - if decltype == "doctype": - j = self._parse_doctype_subset(j + 1, i) - elif decltype in ("attlist", "linktype", "link", "element"): - # must tolerate []'d groups in a content model in an element declaration - # also in data attribute specifications of attlist declaration - # also link type declaration subsets in linktype declarations - # also link attribute specification lists in link declarations - self.error("unsupported '[' char in %s declaration" % decltype) - else: - self.error("unexpected '[' char in declaration") - else: - self.error( - "unexpected %r char in declaration" % rawdata[j]) - if j < 0: - return j - return -1 # incomplete - - # Internal -- parse a marked section - # Override this to handle MS-word extension syntax content - def parse_marked_section(self, i, report=1): - rawdata= self.rawdata - assert rawdata[i:i+3] == ' ending - match= _markedsectionclose.search(rawdata, i+3) - elif sectName in ("if", "else", "endif"): - # look for MS Office ]> ending - match= _msmarkedsectionclose.search(rawdata, i+3) - else: - self.error('unknown status keyword %r in marked section' % rawdata[i+3:j]) - if not match: - return -1 - if report: - j = match.start(0) - self.unknown_decl(rawdata[i+3: j]) - return match.end(0) - - # Internal -- parse comment, return length or -1 if not terminated - def parse_comment(self, i, report=1): - rawdata = self.rawdata - if rawdata[i:i+4] != ' Computer Name - PhysicalDisk --> Object Name - _Total --> The particular Instance (in this case, all instances, i.e. all drives) - Avg. Disk Bytes/Read --> The piece of data being monitored. - -EXAMPLE: Collecting Data with a Query - As an example, the following code implements a logger which allows the - user to choose what counters they would like to log, and logs those - counters for 30 seconds, at two-second intervals. - - query = Query() - query.addcounterbybrowsing() - query.collectdatafor(30,2) - - The data is now stored in a list of lists as: - query.curresults - - The counters(paths) which were used to collect the data are: - query.curpaths - - You can use the win32pdh.ParseCounterPath(path) utility function - to turn the paths into more easily read values for your task, or - write the data to a file, or do whatever you want with it. - -OTHER NOTABLE METHODS: - query.collectdatawhile(period) # start a logging thread for collecting data - query.collectdatawhile_stop() # signal the logging thread to stop logging - query.collectdata() # run the query only once - query.addperfcounter(object, counter, machine=None) # add a standard performance counter - query.addinstcounter(object, counter,machine=None,objtype = 'Process',volatile=1,format = win32pdh.PDH_FMT_LONG) # add a possibly volatile counter - -### Known bugs and limitations ### -Due to a problem with threading under the PythonWin interpreter, there -will be no data logged if the PythonWin window is not the foreground -application. Workaround: scripts using threading should be run in the -python.exe interpreter. - -The volatile-counter handlers are possibly buggy, they haven't been -tested to any extent. The wrapper Query makes it safe to pass invalid -paths (a -1 will be returned, or the Query will be totally ignored, -depending on the missing element), so you should be able to work around -the error by including all possible paths and filtering out the -1's. - -There is no way I know of to stop a thread which is currently sleeping, -so you have to wait until the thread in collectdatawhile is activated -again. This might become a problem in situations where the collection -period is multiple minutes (or hours, or whatever). - -Should make the win32pdh.ParseCounter function available to the Query -classes as a method or something similar, so that it can be accessed -by programmes that have just picked up an instance from somewhere. - -Should explicitly mention where QueryErrors can be raised, and create a -full test set to see if there are any uncaught win32api.error's still -hanging around. - -When using the python.exe interpreter, the addcounterbybrowsing- -generated browser window is often hidden behind other windows. No known -workaround other than Alt-tabing to reach the browser window. - -### Other References ### -The win32pdhutil module (which should be in the %pythonroot%/win32/lib -directory) provides quick-and-dirty utilities for one-off access to -variables from the PDH. Almost everything in that module can be done -with a Query object, but it provides task-oriented functions for a -number of common one-off tasks. - -If you can access the MS Developers Network Library, you can find -information about the PDH API as MS describes it. For a background article, -try: -http://msdn.microsoft.com/library/en-us/dnperfmo/html/msdn_pdhlib.asp - -The reference guide for the PDH API was last spotted at: -http://msdn.microsoft.com/library/en-us/perfmon/base/using_the_pdh_interface.asp - - -In general the Python version of the API is just a wrapper around the -Query-based version of this API (as far as I can see), so you can learn what -you need to from there. From what I understand, the MSDN Online -resources are available for the price of signing up for them. I can't -guarantee how long that's supposed to last. (Or anything for that -matter). -http://premium.microsoft.com/isapi/devonly/prodinfo/msdnprod/msdnlib.idc?theURL=/msdn/library/sdkdoc/perfdata_4982.htm - -The eventual plan is for my (Mike Fletcher's) Starship account to include -a section on NT Administration, and the Query is the first project -in this plan. There should be an article describing the creation of -a simple logger there, but the example above is 90% of the work of -that project, so don't sweat it if you don't find anything there. -(currently the account hasn't been set up). -http://starship.skyport.net/crew/mcfletch/ - -If you need to contact me immediately, (why I can't imagine), you can -email me at mcfletch@golden.net, or just post your question to the -Python newsgroup with a catchy subject line. -news:comp.lang.python - -### Other Stuff ### -The Query classes are by Mike Fletcher, with the working code -being corruptions of Mark Hammonds win32pdhutil module. - -Use at your own risk, no warranties, no guarantees, no assurances, -if you use it, you accept the risk of using it, etceteras. - -''' -# Feb 12, 98 - MH added "rawaddcounter" so caller can get exception details. - -import win32pdh, win32api,time, thread,copy - -class BaseQuery: - ''' - Provides wrapped access to the Performance Data Helper query - objects, generally you should use the child class Query - unless you have need of doing weird things :) - - This class supports two major working paradigms. In the first, - you open the query, and run it as many times as you need, closing - the query when you're done with it. This is suitable for static - queries (ones where processes being monitored don't disappear). - - In the second, you allow the query to be opened each time and - closed afterward. This causes the base query object to be - destroyed after each call. Suitable for dynamic queries (ones - which watch processes which might be closed while watching.) - ''' - def __init__(self,paths=None): - ''' - The PDH Query object is initialised with a single, optional - list argument, that must be properly formatted PDH Counter - paths. Generally this list will only be provided by the class - when it is being unpickled (removed from storage). Normal - use is to call the class with no arguments and use the various - addcounter functions (particularly, for end user's, the use of - addcounterbybrowsing is the most common approach) You might - want to provide the list directly if you want to hard-code the - elements with which your query deals (and thereby avoid the - overhead of unpickling the class). - ''' - self.counters = [] - if paths: - self.paths = paths - else: - self.paths = [] - self._base = None - self.active = 0 - self.curpaths = [] - def addcounterbybrowsing(self, flags = win32pdh.PERF_DETAIL_WIZARD, windowtitle="Python Browser"): - ''' - Adds possibly multiple paths to the paths attribute of the query, - does this by calling the standard counter browsing dialogue. Within - this dialogue, find the counter you want to log, and click: Add, - repeat for every path you want to log, then click on close. The - paths are appended to the non-volatile paths list for this class, - subclasses may create a function which parses the paths and decides - (via heuristics) whether to add the path to the volatile or non-volatile - path list. - e.g.: - query.addcounter() - ''' - win32pdh.BrowseCounters(None,0, self.paths.append, flags, windowtitle) - def rawaddcounter(self,object, counter, instance = None, inum=-1, machine=None): - ''' - Adds a single counter path, without catching any exceptions. - - See addcounter for details. - ''' - path = win32pdh.MakeCounterPath( (machine,object,instance, None, inum,counter) ) - self.paths.append(path) - - def addcounter(self,object, counter, instance = None, inum=-1, machine=None): - ''' - Adds a single counter path to the paths attribute. Normally - this will be called by a child class' speciality functions, - rather than being called directly by the user. (Though it isn't - hard to call manually, since almost everything is given a default) - This method is only functional when the query is closed (or hasn't - yet been opened). This is to prevent conflict in multi-threaded - query applications). - e.g.: - query.addcounter('Memory','Available Bytes') - ''' - if not self.active: - try: - self.rawaddcounter(object, counter, instance, inum, machine) - return 0 - except win32api.error: - return -1 - else: - return -1 - - def open(self): - ''' - Build the base query object for this wrapper, - then add all of the counters required for the query. - Raise a QueryError if we can't complete the functions. - If we are already open, then do nothing. - ''' - if not self.active: # to prevent having multiple open queries - # curpaths are made accessible here because of the possibility of volatile paths - # which may be dynamically altered by subclasses. - self.curpaths = copy.copy(self.paths) - try: - base = win32pdh.OpenQuery() - for path in self.paths: - try: - self.counters.append(win32pdh.AddCounter(base, path)) - except win32api.error: # we passed a bad path - self.counters.append(0) - pass - self._base = base - self.active = 1 - return 0 # open succeeded - except: # if we encounter any errors, kill the Query - try: - self.killbase(base) - except NameError: # failed in creating query - pass - self.active = 0 - self.curpaths = [] - raise QueryError(self) - return 1 # already open - - def killbase(self,base=None): - ''' - ### This is not a public method - Mission critical function to kill the win32pdh objects held - by this object. User's should generally use the close method - instead of this method, in case a sub-class has overridden - close to provide some special functionality. - ''' - # Kill Pythonic references to the objects in this object's namespace - self._base = None - counters = self.counters - self.counters = [] - # we don't kill the curpaths for convenience, this allows the - # user to close a query and still access the last paths - self.active = 0 - # Now call the delete functions on all of the objects - try: - map(win32pdh.RemoveCounter,counters) - except: - pass - try: - win32pdh.CloseQuery(base) - except: - pass - del(counters) - del(base) - def close(self): - ''' - Makes certain that the underlying query object has been closed, - and that all counters have been removed from it. This is - important for reference counting. - You should only need to call close if you have previously called - open. The collectdata methods all can handle opening and - closing the query. Calling close multiple times is acceptable. - ''' - try: - self.killbase(self._base) - except AttributeError: - self.killbase() - __del__ = close - def collectdata(self,format = win32pdh.PDH_FMT_LONG): - ''' - Returns the formatted current values for the Query - ''' - if self._base: # we are currently open, don't change this - return self.collectdataslave(format) - else: # need to open and then close the _base, should be used by one-offs and elements tracking application instances - self.open() # will raise QueryError if couldn't open the query - temp = self.collectdataslave(format) - self.close() # will always close - return temp - def collectdataslave(self,format = win32pdh.PDH_FMT_LONG): - ''' - ### Not a public method - Called only when the Query is known to be open, runs over - the whole set of counters, appending results to the temp, - returns the values as a list. - ''' - try: - win32pdh.CollectQueryData(self._base) - temp = [] - for counter in self.counters: - ok = 0 - try: - if counter: - temp.append(win32pdh.GetFormattedCounterValue(counter, format)[1]) - ok = 1 - except win32api.error: - pass - if not ok: - temp.append(-1) # a better way to signal failure??? - return temp - except win32api.error: # will happen if, for instance, no counters are part of the query and we attempt to collect data for it. - return [-1] * len(self.counters) - # pickle functions - def __getinitargs__(self): - ''' - ### Not a public method - ''' - return (self.paths,) - -class Query(BaseQuery): - ''' - Performance Data Helper(PDH) Query object: - - Provides a wrapper around the native PDH query object which - allows for query reuse, query storage, and general maintenance - functions (adding counter paths in various ways being the most - obvious ones). - ''' - def __init__(self,*args,**namedargs): - ''' - The PDH Query object is initialised with a single, optional - list argument, that must be properly formatted PDH Counter - paths. Generally this list will only be provided by the class - when it is being unpickled (removed from storage). Normal - use is to call the class with no arguments and use the various - addcounter functions (particularly, for end user's, the use of - addcounterbybrowsing is the most common approach) You might - want to provide the list directly if you want to hard-code the - elements with which your query deals (and thereby avoid the - overhead of unpickling the class). - ''' - self.volatilecounters = [] - BaseQuery.__init__(*(self,)+args, **namedargs) - def addperfcounter(self, object, counter, machine=None): - ''' - A "Performance Counter" is a stable, known, common counter, - such as Memory, or Processor. The use of addperfcounter by - end-users is deprecated, since the use of - addcounterbybrowsing is considerably more flexible and general. - It is provided here to allow the easy development of scripts - which need to access variables so common we know them by name - (such as Memory|Available Bytes), and to provide symmetry with - the add inst counter method. - usage: - query.addperfcounter('Memory', 'Available Bytes') - It is just as easy to access addcounter directly, the following - has an identicle effect. - query.addcounter('Memory', 'Available Bytes') - ''' - BaseQuery.addcounter(self, object=object, counter=counter, machine=machine) - def addinstcounter(self, object, counter,machine=None,objtype = 'Process',volatile=1,format = win32pdh.PDH_FMT_LONG): - ''' - The purpose of using an instcounter is to track particular - instances of a counter object (e.g. a single processor, a single - running copy of a process). For instance, to track all python.exe - instances, you would need merely to ask: - query.addinstcounter('python','Virtual Bytes') - You can find the names of the objects and their available counters - by doing an addcounterbybrowsing() call on a query object (or by - looking in performance monitor's add dialog.) - - Beyond merely rearranging the call arguments to make more sense, - if the volatile flag is true, the instcounters also recalculate - the paths of the available instances on every call to open the - query. - ''' - if volatile: - self.volatilecounters.append((object,counter,machine,objtype,format)) - else: - self.paths[len(self.paths):] = self.getinstpaths(object,counter,machine,objtype,format) - - def getinstpaths(self,object,counter,machine=None,objtype='Process',format = win32pdh.PDH_FMT_LONG): - ''' - ### Not an end-user function - Calculate the paths for an instance object. Should alter - to allow processing for lists of object-counter pairs. - ''' - items, instances = win32pdh.EnumObjectItems(None,None,objtype, -1) - # find out how many instances of this element we have... - instances.sort() - try: - cur = instances.index(object) - except ValueError: - return [] # no instances of this object - temp = [object] - try: - while instances[cur+1] == object: - temp.append(object) - cur = cur+1 - except IndexError: # if we went over the end - pass - paths = [] - for ind in range(len(temp)): - # can this raise an error? - paths.append(win32pdh.MakeCounterPath( (machine,'Process',object,None,ind,counter) ) ) - return paths # should also return the number of elements for naming purposes - - def open(self,*args,**namedargs): - ''' - Explicitly open a query: - When you are needing to make multiple calls to the same query, - it is most efficient to open the query, run all of the calls, - then close the query, instead of having the collectdata method - automatically open and close the query each time it runs. - There are currently no arguments to open. - ''' - # do all the normal opening stuff, self._base is now the query object - BaseQuery.open(*(self,)+args, **namedargs) - # should rewrite getinstpaths to take a single tuple - paths = [] - for tup in self.volatilecounters: - paths[len(paths):] = self.getinstpaths(*tup) - for path in paths: - try: - self.counters.append(win32pdh.AddCounter(self._base, path)) - self.curpaths.append(path) # if we fail on the line above, this path won't be in the table or the counters - except win32api.error: - pass # again, what to do with a malformed path??? - def collectdatafor(self, totalperiod, period=1): - ''' - Non-threaded collection of performance data: - This method allows you to specify the total period for which you would - like to run the Query, and the time interval between individual - runs. The collected data is stored in query.curresults at the - _end_ of the run. The pathnames for the query are stored in - query.curpaths. - e.g.: - query.collectdatafor(30,2) - Will collect data for 30seconds at 2 second intervals - ''' - tempresults = [] - try: - self.open() - for ind in xrange(totalperiod/period): - tempresults.append(self.collectdata()) - time.sleep(period) - self.curresults = tempresults - finally: - self.close() - def collectdatawhile(self, period=1): - ''' - Threaded collection of performance data: - This method sets up a simple semaphor system for signalling - when you would like to start and stop a threaded data collection - method. The collection runs every period seconds until the - semaphor attribute is set to a non-true value (which normally - should be done by calling query.collectdatawhile_stop() .) - e.g.: - query.collectdatawhile(2) - # starts the query running, returns control to the caller immediately - # is collecting data every two seconds. - # do whatever you want to do while the thread runs, then call: - query.collectdatawhile_stop() - # when you want to deal with the data. It is generally a good idea - # to sleep for period seconds yourself, since the query will not copy - # the required data until the next iteration: - time.sleep(2) - # now you can access the data from the attributes of the query - query.curresults - query.curpaths - ''' - self.collectdatawhile_active = 1 - thread.start_new_thread(self.collectdatawhile_slave,(period,)) - def collectdatawhile_stop(self): - ''' - Signals the collectdatawhile slave thread to stop collecting data - on the next logging iteration. - ''' - self.collectdatawhile_active = 0 - def collectdatawhile_slave(self, period): - ''' - ### Not a public function - Does the threaded work of collecting the data and storing it - in an attribute of the class. - ''' - tempresults = [] - try: - self.open() # also sets active, so can't be changed. - while self.collectdatawhile_active: - tempresults.append(self.collectdata()) - time.sleep(period) - self.curresults = tempresults - finally: - self.close() - - # pickle functions - def __getinitargs__(self): - return (self.paths,) - def __getstate__(self): - return self.volatilecounters - def __setstate__(self, volatilecounters): - self.volatilecounters = volatilecounters - - -class QueryError: - def __init__(self, query): - self.query = query - def __repr__(self): - return ''%repr(self.query) - __str__ = __repr__ - diff --git a/python/Lib/site-packages/win32/lib/win32pdhutil.py b/python/Lib/site-packages/win32/lib/win32pdhutil.py deleted file mode 100755 index 350e146b58..0000000000 --- a/python/Lib/site-packages/win32/lib/win32pdhutil.py +++ /dev/null @@ -1,168 +0,0 @@ -"""Utilities for the win32 Performance Data Helper module - -Example: - To get a single bit of data: - >>> import win32pdhutil - >>> win32pdhutil.GetPerformanceAttributes("Memory", "Available Bytes") - 6053888 - >>> win32pdhutil.FindPerformanceAttributesByName("python", counter="Virtual Bytes") - [22278144] - - First example returns data which is not associated with any specific instance. - - The second example reads data for a specific instance - hence the list return - - it would return one result for each instance of Python running. - - In general, it can be tricky finding exactly the "name" of the data you wish to query. - Although you can use (None,None,(eg)"Memory", -1) to do this, - the easiest way is often to simply use PerfMon to find out the names. -""" - -import win32pdh, time - -error = win32pdh.error - -# Handle some localization issues. -# see http://support.microsoft.com/default.aspx?scid=http://support.microsoft.com:80/support/kb/articles/Q287/1/59.asp&NoWebContent=1 -# Build a map of english_counter_name: counter_id -counter_english_map = {} - -def find_pdh_counter_localized_name(english_name, machine_name = None): - if not counter_english_map: - import win32api, win32con - counter_reg_value = win32api.RegQueryValueEx(win32con.HKEY_PERFORMANCE_DATA, - "Counter 009") - counter_list = counter_reg_value[0] - for i in xrange(0, len(counter_list) - 1, 2): - try: - counter_id = int(counter_list[i]) - except ValueError: - continue - counter_english_map[counter_list[i+1].lower()] = counter_id - return win32pdh.LookupPerfNameByIndex(machine_name, counter_english_map[english_name.lower()]) - -def GetPerformanceAttributes(object, counter, instance = None, inum=-1, - format = win32pdh.PDH_FMT_LONG, machine=None): - # NOTE: Many counters require 2 samples to give accurate results, - # including "% Processor Time" (as by definition, at any instant, a - # thread's CPU usage is either 0 or 100). To read counters like this, - # you should copy this function, but keep the counter open, and call - # CollectQueryData() each time you need to know. - # See http://support.microsoft.com/default.aspx?scid=kb;EN-US;q262938 - # and http://msdn.microsoft.com/library/en-us/dnperfmo/html/perfmonpt2.asp - # My older explanation for this was that the "AddCounter" process forced - # the CPU to 100%, but the above makes more sense :) - path = win32pdh.MakeCounterPath( (machine,object,instance, None, inum,counter) ) - hq = win32pdh.OpenQuery() - try: - hc = win32pdh.AddCounter(hq, path) - try: - win32pdh.CollectQueryData(hq) - type, val = win32pdh.GetFormattedCounterValue(hc, format) - return val - finally: - win32pdh.RemoveCounter(hc) - finally: - win32pdh.CloseQuery(hq) - -def FindPerformanceAttributesByName(instanceName, object = None, - counter = None, - format = win32pdh.PDH_FMT_LONG, - machine = None, bRefresh=0): - """Find peformance attributes by (case insensitive) instance name. - - Given a process name, return a list with the requested attributes. - Most useful for returning a tuple of PIDs given a process name. - """ - if object is None: object = find_pdh_counter_localized_name("Process", machine) - if counter is None: counter = find_pdh_counter_localized_name("ID Process", machine) - if bRefresh: # PDH docs say this is how you do a refresh. - win32pdh.EnumObjects(None, machine, 0, 1) - instanceName = instanceName.lower() - items, instances = win32pdh.EnumObjectItems(None,None,object, -1) - # Track multiple instances. - instance_dict = {} - for instance in instances: - try: - instance_dict[instance] = instance_dict[instance] + 1 - except KeyError: - instance_dict[instance] = 0 - - ret = [] - for instance, max_instances in instance_dict.iteritems(): - for inum in xrange(max_instances+1): - if instance.lower() == instanceName: - ret.append(GetPerformanceAttributes(object, counter, - instance, inum, format, - machine)) - return ret - -def ShowAllProcesses(): - object = find_pdh_counter_localized_name("Process") - items, instances = win32pdh.EnumObjectItems(None,None,object, - win32pdh.PERF_DETAIL_WIZARD) - # Need to track multiple instances of the same name. - instance_dict = {} - for instance in instances: - try: - instance_dict[instance] = instance_dict[instance] + 1 - except KeyError: - instance_dict[instance] = 0 - - # Bit of a hack to get useful info. - items = [find_pdh_counter_localized_name("ID Process")] + items[:5] - print "Process Name", ",".join(items) - for instance, max_instances in instance_dict.iteritems(): - for inum in xrange(max_instances+1): - hq = win32pdh.OpenQuery() - hcs = [] - for item in items: - path = win32pdh.MakeCounterPath( (None,object,instance, - None, inum, item) ) - hcs.append(win32pdh.AddCounter(hq, path)) - win32pdh.CollectQueryData(hq) - # as per http://support.microsoft.com/default.aspx?scid=kb;EN-US;q262938, some "%" based - # counters need two collections - time.sleep(0.01) - win32pdh.CollectQueryData(hq) - print "%-15s\t" % (instance[:15]), - for hc in hcs: - type, val = win32pdh.GetFormattedCounterValue(hc, win32pdh.PDH_FMT_LONG) - print "%5d" % (val), - win32pdh.RemoveCounter(hc) - print - win32pdh.CloseQuery(hq) - -# NOTE: This BrowseCallback doesn't seem to work on Vista for markh. -# XXX - look at why!? -# Some counters on Vista require elevation, and callback would previously -# clear exceptions without printing them. -def BrowseCallBackDemo(counters): - ## BrowseCounters can now return multiple counter paths - for counter in counters: - machine, object, instance, parentInstance, index, counterName = \ - win32pdh.ParseCounterPath(counter) - - result = GetPerformanceAttributes(object, counterName, instance, index, - win32pdh.PDH_FMT_DOUBLE, machine) - print "Value of '%s' is" % counter, result - print "Added '%s' on object '%s' (machine %s), instance %s(%d)-parent of %s" \ - % (counterName, object, machine, instance, index, parentInstance) - return 0 - -def browse(callback = BrowseCallBackDemo, title="Python Browser", - level=win32pdh.PERF_DETAIL_WIZARD): - win32pdh.BrowseCounters(None,0, callback, level, title, ReturnMultiple=True) - -if __name__=='__main__': - ShowAllProcesses() - # Show how to get a couple of attributes by name. - counter = find_pdh_counter_localized_name("Virtual Bytes") - print "Virtual Bytes = ", FindPerformanceAttributesByName("python", - counter=counter) - print "Available Bytes = ", GetPerformanceAttributes( - find_pdh_counter_localized_name("Memory"), - find_pdh_counter_localized_name("Available Bytes")) - # And a browser. - print "Browsing for counters..." - browse() diff --git a/python/Lib/site-packages/win32/lib/win32rcparser.py b/python/Lib/site-packages/win32/lib/win32rcparser.py deleted file mode 100755 index 682ade940d..0000000000 --- a/python/Lib/site-packages/win32/lib/win32rcparser.py +++ /dev/null @@ -1,602 +0,0 @@ -# Windows dialog .RC file parser, by Adam Walker. - -# This module was adapted from the spambayes project, and is Copyright -# 2003/2004 The Python Software Foundation and is covered by the Python -# Software Foundation license. -""" -This is a parser for Windows .rc files, which are text files which define -dialogs and other Windows UI resources. -""" -__author__="Adam Walker" -__version__="0.11" - -import sys, os, shlex, stat -import pprint -import win32con -import commctrl - -_controlMap = {"DEFPUSHBUTTON":0x80, - "PUSHBUTTON":0x80, - "Button":0x80, - "GROUPBOX":0x80, - "Static":0x82, - "CTEXT":0x82, - "RTEXT":0x82, - "LTEXT":0x82, - "LISTBOX":0x83, - "SCROLLBAR":0x84, - "COMBOBOX":0x85, - "EDITTEXT":0x81, - "ICON":0x82, - "RICHEDIT":"RichEdit20A" - } - -# These are "default styles" for certain controls - ie, Visual Studio assumes -# the styles will be applied, and emits a "NOT {STYLE_NAME}" if it is to be -# disabled. These defaults have been determined by experimentation, so may -# not be completely accurate (most notably, some styles and/or control-types -# may be missing. -_addDefaults = {"EDITTEXT":win32con.WS_BORDER | win32con.WS_TABSTOP, - "GROUPBOX":win32con.BS_GROUPBOX, - "LTEXT":win32con.SS_LEFT, - "DEFPUSHBUTTON":win32con.BS_DEFPUSHBUTTON | win32con.WS_TABSTOP, - "PUSHBUTTON": win32con.WS_TABSTOP, - "CTEXT":win32con.SS_CENTER, - "RTEXT":win32con.SS_RIGHT, - "ICON":win32con.SS_ICON, - "LISTBOX":win32con.LBS_NOTIFY, - } - -defaultControlStyle = win32con.WS_CHILD | win32con.WS_VISIBLE -defaultControlStyleEx = 0 - -class DialogDef: - name = "" - id = 0 - style = 0 - styleEx = None - caption = "" - font = "MS Sans Serif" - fontSize = 8 - x = 0 - y = 0 - w = 0 - h = 0 - template = None - def __init__(self, n, i): - self.name = n - self.id = i - self.styles = [] - self.stylesEx = [] - self.controls = [] - #print "dialog def for ",self.name, self.id - def createDialogTemplate(self): - t = None - self.template = [[self.caption, - (self.x,self.y,self.w,self.h), - self.style, self.styleEx, - (self.fontSize, self.font)] - ] - # Add the controls - for control in self.controls: - self.template.append(control.createDialogTemplate()) - return self.template - -class ControlDef: - id = "" - controlType = "" - subType = "" - idNum = 0 - style = defaultControlStyle - styleEx = defaultControlStyleEx - label = "" - x = 0 - y = 0 - w = 0 - h = 0 - def __init__(self): - self.styles = [] - self.stylesEx = [] - def toString(self): - s = "" - return s - def createDialogTemplate(self): - ct = self.controlType - if "CONTROL"==ct: - ct = self.subType - if ct in _controlMap: - ct = _controlMap[ct] - t = [ct, self.label, self.idNum, (self.x, self.y, self.w, self.h), self.style, self.styleEx] - #print t - return t - -class StringDef: - def __init__(self, id, idNum, value): - self.id = id - self.idNum = idNum - self.value = value - - def __repr__(self): - return "StringDef(%r, %r, %r)" % (self.id, self.idNum, self.value) - -class RCParser: - next_id = 1001 - dialogs = {} - _dialogs = {} - debugEnabled = False - token = "" - - def __init__(self): - self.ungot = False - self.ids = {"IDC_STATIC": -1} - self.names = {-1:"IDC_STATIC"} - self.bitmaps = {} - self.stringTable = {} - self.icons = {} - - def debug(self, *args): - if self.debugEnabled: - print args - - def getToken(self): - if self.ungot: - self.ungot = False - self.debug("getToken returns (ungot):", self.token) - return self.token - self.token = self.lex.get_token() - self.debug("getToken returns:", self.token) - if self.token=="": - self.token = None - return self.token - - def ungetToken(self): - self.ungot = True - - def getCheckToken(self, expected): - tok = self.getToken() - assert tok == expected, "Expected token '%s', but got token '%s'!" % (expected, tok) - return tok - - def getCommaToken(self): - return self.getCheckToken(",") - - # Return the *current* token as a number, only consuming a token - # if it is the negative-sign. - def currentNumberToken(self): - mult = 1 - if self.token=='-': - mult = -1 - self.getToken() - return int(self.token) * mult - - # Return the *current* token as a string literal (ie, self.token will be a - # quote. consumes all tokens until the end of the string - def currentQuotedString(self): - # Handle quoted strings - pity shlex doesn't handle it. - assert self.token.startswith('"'), self.token - bits = [self.token] - while 1: - tok = self.getToken() - if not tok.startswith('"'): - self.ungetToken() - break - bits.append(tok) - sval = "".join(bits)[1:-1] # Remove end quotes. - # Fixup quotes in the body, and all (some?) quoted characters back - # to their raw value. - for i, o in ('""', '"'), ("\\r", "\r"), ("\\n", "\n"), ("\\t", "\t"): - sval = sval.replace(i, o) - return sval - - def load(self, rcstream): - """ - RCParser.loadDialogs(rcFileName) -> None - Load the dialog information into the parser. Dialog Definations can then be accessed - using the "dialogs" dictionary member (name->DialogDef). The "ids" member contains the dictionary of id->name. - The "names" member contains the dictionary of name->id - """ - self.open(rcstream) - self.getToken() - while self.token!=None: - self.parse() - self.getToken() - - def open(self, rcstream): - self.lex = shlex.shlex(rcstream) - self.lex.commenters = "//#" - - def parseH(self, file): - lex = shlex.shlex(file) - lex.commenters = "//" - token = " " - while token is not None: - token = lex.get_token() - if token == "" or token is None: - token = None - else: - if token=='define': - n = lex.get_token() - i = int(lex.get_token()) - self.ids[n] = i - if i in self.names: - # Dupe ID really isn't a problem - most consumers - # want to go from name->id, and this is OK. - # It means you can't go from id->name though. - pass - # ignore AppStudio special ones - #if not n.startswith("_APS_"): - # print "Duplicate id",i,"for",n,"is", self.names[i] - else: - self.names[i] = n - if self.next_id<=i: - self.next_id = i+1 - - def parse(self): - noid_parsers = { - "STRINGTABLE": self.parse_stringtable, - } - - id_parsers = { - "DIALOG" : self.parse_dialog, - "DIALOGEX": self.parse_dialog, -# "TEXTINCLUDE": self.parse_textinclude, - "BITMAP": self.parse_bitmap, - "ICON": self.parse_icon, - } - deep = 0 - base_token = self.token - rp = noid_parsers.get(base_token) - if rp is not None: - rp() - else: - # Not something we parse that isn't prefixed by an ID - # See if it is an ID prefixed item - if it is, our token - # is the resource ID. - resource_id = self.token - self.getToken() - if self.token is None: - return - - if "BEGIN" == self.token: - # A 'BEGIN' for a structure we don't understand - skip to the - # matching 'END' - deep = 1 - while deep!=0 and self.token is not None: - self.getToken() - self.debug("Zooming over", self.token) - if "BEGIN" == self.token: - deep += 1 - elif "END" == self.token: - deep -= 1 - else: - rp = id_parsers.get(self.token) - if rp is not None: - self.debug("Dispatching '%s'" % (self.token,)) - rp(resource_id) - else: - # We don't know what the resource type is, but we - # have already consumed the next, which can cause problems, - # so push it back. - self.debug("Skipping top-level '%s'" % base_token) - self.ungetToken() - - def addId(self, id_name): - if id_name in self.ids: - id = self.ids[id_name] - else: - # IDOK, IDCANCEL etc are special - if a real resource has this value - for n in ["IDOK","IDCANCEL","IDYES","IDNO", "IDABORT"]: - if id_name == n: - v = getattr(win32con, n) - self.ids[n] = v - self.names[v] = n - return v - id = self.next_id - self.next_id += 1 - self.ids[id_name] = id - self.names[id] = id_name - return id - - def lang(self): - while self.token[0:4]=="LANG" or self.token[0:7]=="SUBLANG" or self.token==',': - self.getToken(); - - def parse_textinclude(self, res_id): - while self.getToken() != "BEGIN": - pass - while 1: - if self.token == "END": - break - s = self.getToken() - - def parse_stringtable(self): - while self.getToken() != "BEGIN": - pass - while 1: - self.getToken() - if self.token == "END": - break - sid = self.token - self.getToken() - sd = StringDef(sid, self.addId(sid), self.currentQuotedString()) - self.stringTable[sid] = sd - - def parse_bitmap(self, name): - return self.parse_bitmap_or_icon(name, self.bitmaps) - - def parse_icon(self, name): - return self.parse_bitmap_or_icon(name, self.icons) - - def parse_bitmap_or_icon(self, name, dic): - self.getToken() - while not self.token.startswith('"'): - self.getToken() - bmf = self.token[1:-1] # quotes - dic[name] = bmf - - def parse_dialog(self, name): - dlg = DialogDef(name,self.addId(name)) - assert len(dlg.controls)==0 - self._dialogs[name] = dlg - extras = [] - self.getToken() - while not self.token.isdigit(): - self.debug("extra", self.token) - extras.append(self.token) - self.getToken() - dlg.x = int(self.token) - self.getCommaToken() - self.getToken() # number - dlg.y = int(self.token) - self.getCommaToken() - self.getToken() # number - dlg.w = int(self.token) - self.getCommaToken() - self.getToken() # number - dlg.h = int(self.token) - self.getToken() - while not (self.token==None or self.token=="" or self.token=="END"): - if self.token=="STYLE": - self.dialogStyle(dlg) - elif self.token=="EXSTYLE": - self.dialogExStyle(dlg) - elif self.token=="CAPTION": - self.dialogCaption(dlg) - elif self.token=="FONT": - self.dialogFont(dlg) - elif self.token=="BEGIN": - self.controls(dlg) - else: - break - self.dialogs[name] = dlg.createDialogTemplate() - - def dialogStyle(self, dlg): - dlg.style, dlg.styles = self.styles( [], win32con.DS_SETFONT) - def dialogExStyle(self, dlg): - self.getToken() - dlg.styleEx, dlg.stylesEx = self.styles( [], 0) - - def styles(self, defaults, defaultStyle): - list = defaults - style = defaultStyle - - if "STYLE"==self.token: - self.getToken() - i = 0 - Not = False - while ((i%2==1 and ("|"==self.token or "NOT"==self.token)) or (i%2==0)) and not self.token==None: - Not = False; - if "NOT"==self.token: - Not = True - self.getToken() - i += 1 - if self.token!="|": - if self.token in win32con.__dict__: - value = getattr(win32con,self.token) - else: - if self.token in commctrl.__dict__: - value = getattr(commctrl,self.token) - else: - value = 0 - if Not: - list.append("NOT "+self.token) - self.debug("styles add Not",self.token, value) - style &= ~value - else: - list.append(self.token) - self.debug("styles add", self.token, value) - style |= value - self.getToken() - self.debug("style is ",style) - - return style, list - - def dialogCaption(self, dlg): - if "CAPTION"==self.token: - self.getToken() - self.token = self.token[1:-1] - self.debug("Caption is:",self.token) - dlg.caption = self.token - self.getToken() - def dialogFont(self, dlg): - if "FONT"==self.token: - self.getToken() - dlg.fontSize = int(self.token) - self.getCommaToken() - self.getToken() # Font name - dlg.font = self.token[1:-1] # it's quoted - self.getToken() - while "BEGIN"!=self.token: - self.getToken() - def controls(self, dlg): - if self.token=="BEGIN": self.getToken() - # All controls look vaguely like: - # TYPE [text, ] Control_id, l, t, r, b [, style] - # .rc parser documents all control types as: - # CHECKBOX, COMBOBOX, CONTROL, CTEXT, DEFPUSHBUTTON, EDITTEXT, GROUPBOX, - # ICON, LISTBOX, LTEXT, PUSHBUTTON, RADIOBUTTON, RTEXT, SCROLLBAR - without_text = ["EDITTEXT", "COMBOBOX", "LISTBOX", "SCROLLBAR"] - while self.token!="END": - control = ControlDef() - control.controlType = self.token; - self.getToken() - if control.controlType not in without_text: - if self.token[0:1]=='"': - control.label = self.currentQuotedString() - # Some funny controls, like icons and picture controls use - # the "window text" as extra resource ID (ie, the ID of the - # icon itself). This may be either a literal, or an ID string. - elif self.token=="-" or self.token.isdigit(): - control.label = str(self.currentNumberToken()) - else: - # An ID - use the numeric equiv. - control.label = str(self.addId(self.token)) - self.getCommaToken() - self.getToken() - # Control IDs may be "names" or literal ints - if self.token=="-" or self.token.isdigit(): - control.id = self.currentNumberToken() - control.idNum = control.id - else: - # name of an ID - control.id = self.token - control.idNum = self.addId(control.id) - self.getCommaToken() - - if control.controlType == "CONTROL": - self.getToken() - control.subType = self.token[1:-1] - thisDefaultStyle = defaultControlStyle | \ - _addDefaults.get(control.subType, 0) - # Styles - self.getCommaToken() - self.getToken() - control.style, control.styles = self.styles([], thisDefaultStyle) - else: - thisDefaultStyle = defaultControlStyle | \ - _addDefaults.get(control.controlType, 0) - # incase no style is specified. - control.style = thisDefaultStyle - # Rect - control.x = int(self.getToken()) - self.getCommaToken() - control.y = int(self.getToken()) - self.getCommaToken() - control.w = int(self.getToken()) - self.getCommaToken() - self.getToken() - control.h = int(self.token) - self.getToken() - if self.token==",": - self.getToken() - control.style, control.styles = self.styles([], thisDefaultStyle) - if self.token==",": - self.getToken() - control.styleEx, control.stylesEx = self.styles([], defaultControlStyleEx) - #print control.toString() - dlg.controls.append(control) - -def ParseStreams(rc_file, h_file): - rcp = RCParser() - if h_file: - rcp.parseH(h_file) - try: - rcp.load(rc_file) - except: - lex = getattr(rcp, "lex", None) - if lex: - print "ERROR parsing dialogs at line", lex.lineno - print "Next 10 tokens are:" - for i in range(10): - print lex.get_token(), - print - raise - return rcp - -def Parse(rc_name, h_name = None): - if h_name: - h_file = open(h_name, "rU") - else: - # See if same basename as the .rc - h_name = rc_name[:-2]+"h" - try: - h_file = open(h_name, "rU") - except IOError: - # See if MSVC default of 'resource.h' in the same dir. - h_name = os.path.join(os.path.dirname(rc_name), "resource.h") - try: - h_file = open(h_name, "rU") - except IOError: - # .h files are optional anyway - h_file = None - rc_file = open(rc_name, "rU") - try: - return ParseStreams(rc_file, h_file) - finally: - if h_file is not None: - h_file.close() - rc_file.close() - return rcp - -def GenerateFrozenResource(rc_name, output_name, h_name = None): - """Converts an .rc windows resource source file into a python source file - with the same basic public interface as the rest of this module. - Particularly useful for py2exe or other 'freeze' type solutions, - where a frozen .py file can be used inplace of a real .rc file. - """ - rcp = Parse(rc_name, h_name) - in_stat = os.stat(rc_name) - - out = open(output_name, "wt") - out.write("#%s\n" % output_name) - out.write("#This is a generated file. Please edit %s instead.\n" % rc_name) - out.write("__version__=%r\n" % __version__) - out.write("_rc_size_=%d\n_rc_mtime_=%d\n" % (in_stat[stat.ST_SIZE], in_stat[stat.ST_MTIME])) - - out.write("class StringDef:\n") - out.write("\tdef __init__(self, id, idNum, value):\n") - out.write("\t\tself.id = id\n") - out.write("\t\tself.idNum = idNum\n") - out.write("\t\tself.value = value\n") - out.write("\tdef __repr__(self):\n") - out.write("\t\treturn \"StringDef(%r, %r, %r)\" % (self.id, self.idNum, self.value)\n") - - out.write("class FakeParser:\n") - - for name in "dialogs", "ids", "names", "bitmaps", "icons", "stringTable": - out.write("\t%s = \\\n" % (name,)) - pprint.pprint(getattr(rcp, name), out) - out.write("\n") - - out.write("def Parse(s):\n") - out.write("\treturn FakeParser()\n") - out.close() - -if __name__=='__main__': - if len(sys.argv) <= 1: - print __doc__ - print - print "See test_win32rcparser.py, and the win32rcparser directory (both" - print "in the test suite) for an example of this module's usage." - else: - import pprint - filename = sys.argv[1] - if "-v" in sys.argv: - RCParser.debugEnabled = 1 - print "Dumping all resources in '%s'" % filename - resources = Parse(filename) - for id, ddef in resources.dialogs.iteritems(): - print "Dialog %s (%d controls)" % (id, len(ddef)) - pprint.pprint(ddef) - print - for id, sdef in resources.stringTable.iteritems(): - print "String %s=%r" % (id, sdef.value) - print - for id, sdef in resources.bitmaps.iteritems(): - print "Bitmap %s=%r" % (id, sdef) - print - for id, sdef in resources.icons.iteritems(): - print "Icon %s=%r" % (id, sdef) - print diff --git a/python/Lib/site-packages/win32/lib/win32serviceutil.py b/python/Lib/site-packages/win32/lib/win32serviceutil.py deleted file mode 100755 index 656923e51d..0000000000 --- a/python/Lib/site-packages/win32/lib/win32serviceutil.py +++ /dev/null @@ -1,839 +0,0 @@ -# General purpose service utilities, both for standard Python scripts, -# and for for Python programs which run as services... -# -# Note that most utility functions here will raise win32api.error's -# (which is == win32service.error, pywintypes.error, etc) -# when things go wrong - eg, not enough permissions to hit the -# registry etc. - -import win32service, win32api, win32con, winerror -import sys, pywintypes, os, warnings -error = RuntimeError - -def LocatePythonServiceExe(exeName = None): - if not exeName and hasattr(sys, "frozen"): - # If py2exe etc calls this with no exeName, default is current exe. - return sys.executable - - # Try and find the specified EXE somewhere. If specifically registered, - # use it. Otherwise look down sys.path, and the global PATH environment. - if exeName is None: - if os.path.splitext(win32service.__file__)[0].endswith("_d"): - exeName = "PythonService_d.exe" - else: - exeName = "PythonService.exe" - # See if it exists as specified - if os.path.isfile(exeName): return win32api.GetFullPathName(exeName) - baseName = os.path.splitext(os.path.basename(exeName))[0] - try: - exeName = win32api.RegQueryValue(win32con.HKEY_LOCAL_MACHINE, - "Software\\Python\\%s\\%s" % (baseName, sys.winver)) - if os.path.isfile(exeName): - return exeName - raise RuntimeError("The executable '%s' is registered as the Python " \ - "service exe, but it does not exist as specified" \ - % exeName) - except win32api.error: - # OK - not there - lets go a-searchin' - for path in [sys.prefix] + sys.path: - look = os.path.join(path, exeName) - if os.path.isfile(look): - return win32api.GetFullPathName(look) - # Try the global Path. - try: - return win32api.SearchPath(None, exeName)[0] - except win32api.error: - msg = "%s is not correctly registered\nPlease locate and run %s, and it will self-register\nThen run this service registration process again." % (exeName, exeName) - raise error(msg) - -def _GetServiceShortName(longName): - # looks up a services name - # from the display name - # Thanks to Andy McKay for this code. - access = win32con.KEY_READ | win32con.KEY_ENUMERATE_SUB_KEYS | win32con.KEY_QUERY_VALUE - hkey = win32api.RegOpenKey(win32con.HKEY_LOCAL_MACHINE, "SYSTEM\\CurrentControlSet\\Services", 0, access) - num = win32api.RegQueryInfoKey(hkey)[0] - longName = longName.lower() - # loop through number of subkeys - for x in range(0, num): - # find service name, open subkey - svc = win32api.RegEnumKey(hkey, x) - skey = win32api.RegOpenKey(hkey, svc, 0, access) - try: - # find display name - thisName = str(win32api.RegQueryValueEx(skey, "DisplayName")[0]) - if thisName.lower() == longName: - return svc - except win32api.error: - # in case there is no key called DisplayName - pass - return None - -# Open a service given either it's long or short name. -def SmartOpenService(hscm, name, access): - try: - return win32service.OpenService(hscm, name, access) - except win32api.error, details: - if details.winerror not in [winerror.ERROR_SERVICE_DOES_NOT_EXIST, - winerror.ERROR_INVALID_NAME]: - raise - name = win32service.GetServiceKeyName(hscm, name) - return win32service.OpenService(hscm, name, access) - -def LocateSpecificServiceExe(serviceName): - # Given the name of a specific service, return the .EXE name _it_ uses - # (which may or may not be the Python Service EXE - hkey = win32api.RegOpenKey(win32con.HKEY_LOCAL_MACHINE, "SYSTEM\\CurrentControlSet\\Services\\%s" % (serviceName), 0, win32con.KEY_ALL_ACCESS) - try: - return win32api.RegQueryValueEx(hkey, "ImagePath")[0] - finally: - hkey.Close() - -def InstallPerfmonForService(serviceName, iniName, dllName = None): - # If no DLL name, look it up in the INI file name - if not dllName: # May be empty string! - dllName = win32api.GetProfileVal("Python", "dll", "", iniName) - # Still not found - look for the standard one in the same dir as win32service.pyd - if not dllName: - try: - tryName = os.path.join(os.path.split(win32service.__file__)[0], "perfmondata.dll") - if os.path.isfile(tryName): - dllName = tryName - except AttributeError: - # Frozen app? - anyway, can't find it! - pass - if not dllName: - raise ValueError("The name of the performance DLL must be available") - dllName = win32api.GetFullPathName(dllName) - # Now setup all the required "Performance" entries. - hkey = win32api.RegOpenKey(win32con.HKEY_LOCAL_MACHINE, "SYSTEM\\CurrentControlSet\\Services\\%s" % (serviceName), 0, win32con.KEY_ALL_ACCESS) - try: - subKey = win32api.RegCreateKey(hkey, "Performance") - try: - win32api.RegSetValueEx(subKey, "Library", 0, win32con.REG_SZ, dllName) - win32api.RegSetValueEx(subKey, "Open", 0, win32con.REG_SZ, "OpenPerformanceData") - win32api.RegSetValueEx(subKey, "Close", 0, win32con.REG_SZ, "ClosePerformanceData") - win32api.RegSetValueEx(subKey, "Collect", 0, win32con.REG_SZ, "CollectPerformanceData") - finally: - win32api.RegCloseKey(subKey) - finally: - win32api.RegCloseKey(hkey) - # Now do the "Lodctr" thang... - - try: - import perfmon - path, fname = os.path.split(iniName) - oldPath = os.getcwd() - if path: - os.chdir(path) - try: - perfmon.LoadPerfCounterTextStrings("python.exe " + fname) - finally: - os.chdir(oldPath) - except win32api.error, details: - print "The service was installed OK, but the performance monitor" - print "data could not be loaded.", details - -def _GetCommandLine(exeName, exeArgs): - if exeArgs is not None: - return exeName + " " + exeArgs - else: - return exeName - -def InstallService(pythonClassString, serviceName, displayName, startType = None, errorControl = None, bRunInteractive = 0, serviceDeps = None, userName = None, password = None, exeName = None, perfMonIni = None, perfMonDll = None, exeArgs = None, - description = None, delayedstart = None): - # Handle the default arguments. - if startType is None: - startType = win32service.SERVICE_DEMAND_START - serviceType = win32service.SERVICE_WIN32_OWN_PROCESS - if bRunInteractive: - serviceType = serviceType | win32service.SERVICE_INTERACTIVE_PROCESS - if errorControl is None: - errorControl = win32service.SERVICE_ERROR_NORMAL - - exeName = '"%s"' % LocatePythonServiceExe(exeName) # None here means use default PythonService.exe - commandLine = _GetCommandLine(exeName, exeArgs) - hscm = win32service.OpenSCManager(None,None,win32service.SC_MANAGER_ALL_ACCESS) - try: - hs = win32service.CreateService(hscm, - serviceName, - displayName, - win32service.SERVICE_ALL_ACCESS, # desired access - serviceType, # service type - startType, - errorControl, # error control type - commandLine, - None, - 0, - serviceDeps, - userName, - password) - if description is not None: - try: - win32service.ChangeServiceConfig2(hs,win32service.SERVICE_CONFIG_DESCRIPTION,description) - except NotImplementedError: - pass ## ChangeServiceConfig2 and description do not exist on NT - if delayedstart is not None: - try: - win32service.ChangeServiceConfig2(hs,win32service.SERVICE_CONFIG_DELAYED_AUTO_START_INFO, delayedstart) - except (win32service.error, NotImplementedError): - ## delayed start only exists on Vista and later - warn only when trying to set delayed to True - if delayedstart: - warnings.warn('Delayed Start not available on this system') - win32service.CloseServiceHandle(hs) - finally: - win32service.CloseServiceHandle(hscm) - InstallPythonClassString(pythonClassString, serviceName) - # If I have performance monitor info to install, do that. - if perfMonIni is not None: - InstallPerfmonForService(serviceName, perfMonIni, perfMonDll) - -def ChangeServiceConfig(pythonClassString, serviceName, startType = None, errorControl = None, bRunInteractive = 0, - serviceDeps = None, userName = None, password = None, - exeName = None, displayName = None, perfMonIni = None, perfMonDll = None, - exeArgs = None, description = None, delayedstart = None): - # Before doing anything, remove any perfmon counters. - try: - import perfmon - perfmon.UnloadPerfCounterTextStrings("python.exe "+serviceName) - except (ImportError, win32api.error): - pass - - # The EXE location may have changed - exeName = '"%s"' % LocatePythonServiceExe(exeName) - - # Handle the default arguments. - if startType is None: startType = win32service.SERVICE_NO_CHANGE - if errorControl is None: errorControl = win32service.SERVICE_NO_CHANGE - - hscm = win32service.OpenSCManager(None,None,win32service.SC_MANAGER_ALL_ACCESS) - serviceType = win32service.SERVICE_WIN32_OWN_PROCESS - if bRunInteractive: - serviceType = serviceType | win32service.SERVICE_INTERACTIVE_PROCESS - commandLine = _GetCommandLine(exeName, exeArgs) - try: - hs = SmartOpenService(hscm, serviceName, win32service.SERVICE_ALL_ACCESS) - try: - - win32service.ChangeServiceConfig(hs, - serviceType, # service type - startType, - errorControl, # error control type - commandLine, - None, - 0, - serviceDeps, - userName, - password, - displayName) - if description is not None: - try: - win32service.ChangeServiceConfig2(hs,win32service.SERVICE_CONFIG_DESCRIPTION,description) - except NotImplementedError: - pass ## ChangeServiceConfig2 and description do not exist on NT - if delayedstart is not None: - try: - win32service.ChangeServiceConfig2(hs,win32service.SERVICE_CONFIG_DELAYED_AUTO_START_INFO, delayedstart) - except (win32service.error, NotImplementedError): - ## Delayed start only exists on Vista and later. On Nt, will raise NotImplementedError since ChangeServiceConfig2 - ## doensn't exist. On Win2k and XP, will fail with ERROR_INVALID_LEVEL - ## Warn only if trying to set delayed to True - if delayedstart: - warnings.warn('Delayed Start not available on this system') - finally: - win32service.CloseServiceHandle(hs) - finally: - win32service.CloseServiceHandle(hscm) - InstallPythonClassString(pythonClassString, serviceName) - # If I have performance monitor info to install, do that. - if perfMonIni is not None: - InstallPerfmonForService(serviceName, perfMonIni, perfMonDll) - -def InstallPythonClassString(pythonClassString, serviceName): - # Now setup our Python specific entries. - if pythonClassString: - key = win32api.RegCreateKey(win32con.HKEY_LOCAL_MACHINE, "System\\CurrentControlSet\\Services\\%s\\PythonClass" % serviceName) - try: - win32api.RegSetValue(key, None, win32con.REG_SZ, pythonClassString); - finally: - win32api.RegCloseKey(key) - -# Utility functions for Services, to allow persistant properties. -def SetServiceCustomOption(serviceName, option, value): - try: - serviceName = serviceName._svc_name_ - except AttributeError: - pass - key = win32api.RegCreateKey(win32con.HKEY_LOCAL_MACHINE, "System\\CurrentControlSet\\Services\\%s\\Parameters" % serviceName) - try: - if type(value)==type(0): - win32api.RegSetValueEx(key, option, 0, win32con.REG_DWORD, value); - else: - win32api.RegSetValueEx(key, option, 0, win32con.REG_SZ, value); - finally: - win32api.RegCloseKey(key) - -def GetServiceCustomOption(serviceName, option, defaultValue = None): - # First param may also be a service class/instance. - # This allows services to pass "self" - try: - serviceName = serviceName._svc_name_ - except AttributeError: - pass - key = win32api.RegCreateKey(win32con.HKEY_LOCAL_MACHINE, "System\\CurrentControlSet\\Services\\%s\\Parameters" % serviceName) - try: - try: - return win32api.RegQueryValueEx(key, option)[0] - except win32api.error: # No value. - return defaultValue - finally: - win32api.RegCloseKey(key) - - -def RemoveService(serviceName): - try: - import perfmon - perfmon.UnloadPerfCounterTextStrings("python.exe "+serviceName) - except (ImportError, win32api.error): - pass - - hscm = win32service.OpenSCManager(None,None,win32service.SC_MANAGER_ALL_ACCESS) - try: - hs = SmartOpenService(hscm, serviceName, win32service.SERVICE_ALL_ACCESS) - win32service.DeleteService(hs) - win32service.CloseServiceHandle(hs) - finally: - win32service.CloseServiceHandle(hscm) - - import win32evtlogutil - try: - win32evtlogutil.RemoveSourceFromRegistry(serviceName) - except win32api.error: - pass - -def ControlService(serviceName, code, machine = None): - hscm = win32service.OpenSCManager(machine,None,win32service.SC_MANAGER_ALL_ACCESS) - try: - - hs = SmartOpenService(hscm, serviceName, win32service.SERVICE_ALL_ACCESS) - try: - status = win32service.ControlService(hs, code) - finally: - win32service.CloseServiceHandle(hs) - finally: - win32service.CloseServiceHandle(hscm) - return status - -def __FindSvcDeps(findName): - if type(findName) is pywintypes.UnicodeType: findName = str(findName) - dict = {} - k = win32api.RegOpenKey(win32con.HKEY_LOCAL_MACHINE, "SYSTEM\\CurrentControlSet\\Services") - num = 0 - while 1: - try: - svc = win32api.RegEnumKey(k, num) - except win32api.error: - break - num = num + 1 - sk = win32api.RegOpenKey(k, svc) - try: - deps, typ = win32api.RegQueryValueEx(sk, "DependOnService") - except win32api.error: - deps = () - for dep in deps: - dep = dep.lower() - dep_on = dict.get(dep, []) - dep_on.append(svc) - dict[dep]=dep_on - - return __ResolveDeps(findName, dict) - - -def __ResolveDeps(findName, dict): - items = dict.get(findName.lower(), []) - retList = [] - for svc in items: - retList.insert(0, svc) - retList = __ResolveDeps(svc, dict) + retList - return retList - -def WaitForServiceStatus(serviceName, status, waitSecs, machine=None): - """Waits for the service to return the specified status. You - should have already requested the service to enter that state""" - for i in range(waitSecs*4): - now_status = QueryServiceStatus(serviceName, machine)[1] - if now_status == status: - break - win32api.Sleep(250) - else: - raise pywintypes.error(winerror.ERROR_SERVICE_REQUEST_TIMEOUT, "QueryServiceStatus", win32api.FormatMessage(winerror.ERROR_SERVICE_REQUEST_TIMEOUT)[:-2]) - -def __StopServiceWithTimeout(hs, waitSecs = 30): - try: - status = win32service.ControlService(hs, win32service.SERVICE_CONTROL_STOP) - except pywintypes.error, exc: - if exc.winerror!=winerror.ERROR_SERVICE_NOT_ACTIVE: - raise - for i in range(waitSecs): - status = win32service.QueryServiceStatus(hs) - if status[1] == win32service.SERVICE_STOPPED: - break - win32api.Sleep(1000) - else: - raise pywintypes.error(winerror.ERROR_SERVICE_REQUEST_TIMEOUT, "ControlService", win32api.FormatMessage(winerror.ERROR_SERVICE_REQUEST_TIMEOUT)[:-2]) - - -def StopServiceWithDeps(serviceName, machine = None, waitSecs = 30): - # Stop a service recursively looking for dependant services - hscm = win32service.OpenSCManager(machine,None,win32service.SC_MANAGER_ALL_ACCESS) - try: - deps = __FindSvcDeps(serviceName) - for dep in deps: - hs = win32service.OpenService(hscm, dep, win32service.SERVICE_ALL_ACCESS) - try: - __StopServiceWithTimeout(hs, waitSecs) - finally: - win32service.CloseServiceHandle(hs) - # Now my service! - hs = win32service.OpenService(hscm, serviceName, win32service.SERVICE_ALL_ACCESS) - try: - __StopServiceWithTimeout(hs, waitSecs) - finally: - win32service.CloseServiceHandle(hs) - - finally: - win32service.CloseServiceHandle(hscm) - - -def StopService(serviceName, machine = None): - return ControlService(serviceName, win32service.SERVICE_CONTROL_STOP, machine) - -def StartService(serviceName, args = None, machine = None): - hscm = win32service.OpenSCManager(machine,None,win32service.SC_MANAGER_ALL_ACCESS) - try: - - hs = SmartOpenService(hscm, serviceName, win32service.SERVICE_ALL_ACCESS) - try: - win32service.StartService(hs, args) - finally: - win32service.CloseServiceHandle(hs) - finally: - win32service.CloseServiceHandle(hscm) - -def RestartService(serviceName, args = None, waitSeconds = 30, machine = None): - "Stop the service, and then start it again (with some tolerance for allowing it to stop.)" - try: - StopService(serviceName, machine) - except pywintypes.error, exc: - # Allow only "service not running" error - if exc.winerror!=winerror.ERROR_SERVICE_NOT_ACTIVE: - raise - # Give it a few goes, as the service may take time to stop - for i in range(waitSeconds): - try: - StartService(serviceName, args, machine) - break - except pywintypes.error, exc: - if exc.winerror!=winerror.ERROR_SERVICE_ALREADY_RUNNING: - raise - win32api.Sleep(1000) - else: - print "Gave up waiting for the old service to stop!" - -def _DebugCtrlHandler(evt): - if evt in (win32con.CTRL_C_EVENT, win32con.CTRL_BREAK_EVENT): - assert g_debugService - print "Stopping debug service." - g_debugService.SvcStop() - return True - return False - -def DebugService(cls, argv = []): - # Run a service in "debug" mode. Re-implements what pythonservice.exe - # does when it sees a "-debug" param. - # Currently only used by "frozen" (ie, py2exe) programs (but later may - # end up being used for all services should we ever remove - # pythonservice.exe) - import servicemanager - global g_debugService - - print "Debugging service %s - press Ctrl+C to stop." % (cls._svc_name_,) - servicemanager.Debugging(True) - servicemanager.PrepareToHostSingle(cls) - g_debugService = cls(argv) - # Setup a ctrl+c handler to simulate a "stop" - win32api.SetConsoleCtrlHandler(_DebugCtrlHandler, True) - try: - g_debugService.SvcRun() - finally: - win32api.SetConsoleCtrlHandler(_DebugCtrlHandler, False) - servicemanager.Debugging(False) - g_debugService = None - -def GetServiceClassString(cls, argv = None): - if argv is None: - argv = sys.argv - import pickle - modName = pickle.whichmodule(cls, cls.__name__) - if modName == '__main__': - try: - fname = win32api.GetFullPathName(argv[0]) - path = os.path.split(fname)[0] - # Eaaaahhhh - sometimes this will be a short filename, which causes - # problems with 1.5.1 and the silly filename case rule. - # Get the long name - fname = os.path.join(path, win32api.FindFiles(fname)[0][8]) - except win32api.error: - raise error("Could not resolve the path name '%s' to a full path" % (argv[0])) - modName = os.path.splitext(fname)[0] - return modName + "." + cls.__name__ - -def QueryServiceStatus(serviceName, machine=None): - hscm = win32service.OpenSCManager(machine,None,win32service.SC_MANAGER_CONNECT) - try: - - hs = SmartOpenService(hscm, serviceName, win32service.SERVICE_QUERY_STATUS) - try: - status = win32service.QueryServiceStatus(hs) - finally: - win32service.CloseServiceHandle(hs) - finally: - win32service.CloseServiceHandle(hscm) - return status - -def usage(): - try: - fname = os.path.split(sys.argv[0])[1] - except: - fname = sys.argv[0] - print "Usage: '%s [options] install|update|remove|start [...]|stop|restart [...]|debug [...]'" % fname - print "Options for 'install' and 'update' commands only:" - print " --username domain\\username : The Username the service is to run under" - print " --password password : The password for the username" - print " --startup [manual|auto|disabled|delayed] : How the service starts, default = manual" - print " --interactive : Allow the service to interact with the desktop." - print " --perfmonini file: .ini file to use for registering performance monitor data" - print " --perfmondll file: .dll file to use when querying the service for" - print " performance data, default = perfmondata.dll" - print "Options for 'start' and 'stop' commands only:" - print " --wait seconds: Wait for the service to actually start or stop." - print " If you specify --wait with the 'stop' option, the service" - print " and all dependent services will be stopped, each waiting" - print " the specified period." - sys.exit(1) - -def HandleCommandLine(cls, serviceClassString = None, argv = None, customInstallOptions = "", customOptionHandler = None): - """Utility function allowing services to process the command line. - - Allows standard commands such as 'start', 'stop', 'debug', 'install' etc. - - Install supports 'standard' command line options prefixed with '--', such as - --username, --password, etc. In addition, - the function allows custom command line options to be handled by the calling function. - """ - err = 0 - - if argv is None: argv = sys.argv - - if len(argv)<=1: - usage() - - serviceName = cls._svc_name_ - serviceDisplayName = cls._svc_display_name_ - if serviceClassString is None: - serviceClassString = GetServiceClassString(cls) - - # Pull apart the command line - import getopt - try: - opts, args = getopt.getopt(argv[1:], customInstallOptions,["password=","username=","startup=","perfmonini=", "perfmondll=", "interactive", "wait="]) - except getopt.error, details: - print details - usage() - userName = None - password = None - perfMonIni = perfMonDll = None - startup = None - delayedstart = None - interactive = None - waitSecs = 0 - for opt, val in opts: - if opt=='--username': - userName = val - elif opt=='--password': - password = val - elif opt=='--perfmonini': - perfMonIni = val - elif opt=='--perfmondll': - perfMonDll = val - elif opt=='--interactive': - interactive = 1 - elif opt=='--startup': - map = {"manual": win32service.SERVICE_DEMAND_START, - "auto" : win32service.SERVICE_AUTO_START, - "delayed": win32service.SERVICE_AUTO_START, ## ChangeServiceConfig2 called later - "disabled": win32service.SERVICE_DISABLED} - try: - startup = map[val.lower()] - except KeyError: - print "'%s' is not a valid startup option" % val - if val.lower() == "delayed": - delayedstart = True - elif val.lower() == "auto": - delayedstart = False - ## else no change - elif opt=='--wait': - try: - waitSecs = int(val) - except ValueError: - print "--wait must specify an integer number of seconds." - usage() - - arg=args[0] - knownArg = 0 - # First we process all arguments which pass additional args on - if arg=="start": - knownArg = 1 - print "Starting service %s" % (serviceName) - try: - StartService(serviceName, args[1:]) - if waitSecs: - WaitForServiceStatus(serviceName, win32service.SERVICE_RUNNING, waitSecs) - except win32service.error, exc: - print "Error starting service: %s" % exc.strerror - err = exc.winerror - - elif arg=="restart": - knownArg = 1 - print "Restarting service %s" % (serviceName) - RestartService(serviceName, args[1:]) - if waitSecs: - WaitForServiceStatus(serviceName, win32service.SERVICE_RUNNING, waitSecs) - - elif arg=="debug": - knownArg = 1 - if not hasattr(sys, "frozen"): - # non-frozen services use pythonservice.exe which handles a - # -debug option - svcArgs = " ".join(args[1:]) - try: - exeName = LocateSpecificServiceExe(serviceName) - except win32api.error, exc: - if exc[0] == winerror.ERROR_FILE_NOT_FOUND: - print "The service does not appear to be installed." - print "Please install the service before debugging it." - sys.exit(1) - raise - try: - os.system("%s -debug %s %s" % (exeName, serviceName, svcArgs)) - # ^C is used to kill the debug service. Sometimes Python also gets - # interrupted - ignore it... - except KeyboardInterrupt: - pass - else: - # py2exe services don't use pythonservice - so we simulate - # debugging here. - DebugService(cls, args) - - if not knownArg and len(args)!=1: - usage() # the rest of the cmds don't take addn args - - if arg=="install": - knownArg = 1 - try: - serviceDeps = cls._svc_deps_ - except AttributeError: - serviceDeps = None - try: - exeName = cls._exe_name_ - except AttributeError: - exeName = None # Default to PythonService.exe - try: - exeArgs = cls._exe_args_ - except AttributeError: - exeArgs = None - try: - description = cls._svc_description_ - except AttributeError: - description = None - print "Installing service %s" % (serviceName,) - # Note that we install the service before calling the custom option - # handler, so if the custom handler fails, we have an installed service (from NT's POV) - # but is unlikely to work, as the Python code controlling it failed. Therefore - # we remove the service if the first bit works, but the second doesnt! - try: - InstallService(serviceClassString, serviceName, serviceDisplayName, serviceDeps = serviceDeps, startType=startup, bRunInteractive=interactive, userName=userName,password=password, exeName=exeName, perfMonIni=perfMonIni,perfMonDll=perfMonDll,exeArgs=exeArgs, - description=description, delayedstart=delayedstart) - if customOptionHandler: - customOptionHandler(*(opts,)) - print "Service installed" - except win32service.error, exc: - if exc.winerror==winerror.ERROR_SERVICE_EXISTS: - arg = "update" # Fall through to the "update" param! - else: - print "Error installing service: %s (%d)" % (exc.strerror, exc.winerror) - err = exc.winerror - except ValueError, msg: # Can be raised by custom option handler. - print "Error installing service: %s" % str(msg) - err = -1 - # xxx - maybe I should remove after _any_ failed install - however, - # xxx - it may be useful to help debug to leave the service as it failed. - # xxx - We really _must_ remove as per the comments above... - # As we failed here, remove the service, so the next installation - # attempt works. - try: - RemoveService(serviceName) - except win32api.error: - print "Warning - could not remove the partially installed service." - - if arg == "update": - knownArg = 1 - try: - serviceDeps = cls._svc_deps_ - except AttributeError: - serviceDeps = None - try: - exeName = cls._exe_name_ - except AttributeError: - exeName = None # Default to PythonService.exe - try: - exeArgs = cls._exe_args_ - except AttributeError: - exeArgs = None - try: - description=cls._svc_description_ - except AttributeError: - description=None - print "Changing service configuration" - try: - ChangeServiceConfig(serviceClassString, serviceName, serviceDeps = serviceDeps, startType=startup, bRunInteractive=interactive, userName=userName,password=password, exeName=exeName, displayName = serviceDisplayName, perfMonIni=perfMonIni,perfMonDll=perfMonDll,exeArgs=exeArgs, - description=description, delayedstart=delayedstart) - if customOptionHandler: - customOptionHandler(*(opts,)) - print "Service updated" - except win32service.error, exc: - print "Error changing service configuration: %s (%d)" % (exc.strerror,exc.winerror) - err = exc.winerror - - elif arg=="remove": - knownArg = 1 - print "Removing service %s" % (serviceName) - try: - RemoveService(serviceName) - print "Service removed" - except win32service.error, exc: - print "Error removing service: %s (%d)" % (exc.strerror,exc.winerror) - err = exc.winerror - elif arg=="stop": - knownArg = 1 - print "Stopping service %s" % (serviceName) - try: - if waitSecs: - StopServiceWithDeps(serviceName, waitSecs = waitSecs) - else: - StopService(serviceName) - except win32service.error, exc: - print "Error stopping service: %s (%d)" % (exc.strerror,exc.winerror) - err = exc.winerror - if not knownArg: - err = -1 - print "Unknown command - '%s'" % arg - usage() - return err - -# -# Useful base class to build services from. -# -class ServiceFramework: - # Required Attributes: - # _svc_name_ = The service name - # _svc_display_name_ = The service display name - - # Optional Attributes: - _svc_deps_ = None # sequence of service names on which this depends - _exe_name_ = None # Default to PythonService.exe - _exe_args_ = None # Default to no arguments - _svc_description_ = None # Only exists on Windows 2000 or later, ignored on windows NT - - def __init__(self, args): - import servicemanager - self.ssh = servicemanager.RegisterServiceCtrlHandler(args[0], self.ServiceCtrlHandlerEx, True) - servicemanager.SetEventSourceName(self._svc_name_) - self.checkPoint = 0 - - def GetAcceptedControls(self): - # Setup the service controls we accept based on our attributes. Note - # that if you need to handle controls via SvcOther[Ex](), you must - # override this. - accepted = 0 - if hasattr(self, "SvcStop"): accepted = accepted | win32service.SERVICE_ACCEPT_STOP - if hasattr(self, "SvcPause") and hasattr(self, "SvcContinue"): - accepted = accepted | win32service.SERVICE_ACCEPT_PAUSE_CONTINUE - if hasattr(self, "SvcShutdown"): accepted = accepted | win32service.SERVICE_ACCEPT_SHUTDOWN - return accepted - - def ReportServiceStatus(self, serviceStatus, waitHint = 5000, win32ExitCode = 0, svcExitCode = 0): - if self.ssh is None: # Debugging! - return - if serviceStatus == win32service.SERVICE_START_PENDING: - accepted = 0 - else: - accepted = self.GetAcceptedControls() - - if serviceStatus in [win32service.SERVICE_RUNNING, win32service.SERVICE_STOPPED]: - checkPoint = 0 - else: - self.checkPoint = self.checkPoint + 1 - checkPoint = self.checkPoint - - # Now report the status to the control manager - status = (win32service.SERVICE_WIN32_OWN_PROCESS, - serviceStatus, - accepted, # dwControlsAccepted, - win32ExitCode, # dwWin32ExitCode; - svcExitCode, # dwServiceSpecificExitCode; - checkPoint, # dwCheckPoint; - waitHint) - win32service.SetServiceStatus( self.ssh, status) - - def SvcInterrogate(self): - # Assume we are running, and everyone is happy. - self.ReportServiceStatus(win32service.SERVICE_RUNNING) - - def SvcOther(self, control): - try: - print "Unknown control status - %d" % control - except IOError: - # services may not have a valid stdout! - pass - - def ServiceCtrlHandler(self, control): - return self.ServiceCtrlHandlerEx(control, 0, None) - - # The 'Ex' functions, which take additional params - def SvcOtherEx(self, control, event_type, data): - # The default here is to call self.SvcOther as that is the old behaviour. - # If you want to take advantage of the extra data, override this method - return self.SvcOther(control) - - def ServiceCtrlHandlerEx(self, control, event_type, data): - if control==win32service.SERVICE_CONTROL_STOP: - return self.SvcStop() - elif control==win32service.SERVICE_CONTROL_PAUSE: - return self.SvcPause() - elif control==win32service.SERVICE_CONTROL_CONTINUE: - return self.SvcContinue() - elif control==win32service.SERVICE_CONTROL_INTERROGATE: - return self.SvcInterrogate() - elif control==win32service.SERVICE_CONTROL_SHUTDOWN: - return self.SvcShutdown() - else: - return self.SvcOtherEx(control, event_type, data) - - def SvcRun(self): - self.ReportServiceStatus(win32service.SERVICE_RUNNING) - self.SvcDoRun() - # Once SvcDoRun terminates, the service has stopped. - # We tell the SCM the service is still stopping - the C framework - # will automatically tell the SCM it has stopped when this returns. - self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING) diff --git a/python/Lib/site-packages/win32/lib/win32timezone.py b/python/Lib/site-packages/win32/lib/win32timezone.py deleted file mode 100755 index e1f07b8aaa..0000000000 --- a/python/Lib/site-packages/win32/lib/win32timezone.py +++ /dev/null @@ -1,975 +0,0 @@ -# -*- coding: UTF-8 -*- - -""" -win32timezone: - Module for handling datetime.tzinfo time zones using the windows -registry for time zone information. The time zone names are dependent -on the registry entries defined by the operating system. - - This module may be tested using the doctest module. - - Written by Jason R. Coombs (jaraco@jaraco.com). - Copyright © 2003-2012. - All Rights Reserved. - - This module is licenced for use in Mark Hammond's pywin32 -library under the same terms as the pywin32 library. - - To use this time zone module with the datetime module, simply pass -the TimeZoneInfo object to the datetime constructor. For example, - ->>> import win32timezone, datetime ->>> assert 'Mountain Standard Time' in win32timezone.TimeZoneInfo.get_sorted_time_zone_names() ->>> MST = win32timezone.TimeZoneInfo('Mountain Standard Time') ->>> now = datetime.datetime.now(MST) - - The now object is now a time-zone aware object, and daylight savings- -aware methods may be called on it. - ->>> now.utcoffset() in (datetime.timedelta(-1, 61200), datetime.timedelta(-1, 64800)) -True - -(note that the result of utcoffset call will be different based on when now was -generated, unless standard time is always used) - ->>> now = datetime.datetime.now(TimeZoneInfo('Mountain Standard Time', True)) ->>> now.utcoffset() -datetime.timedelta(-1, 61200) - ->>> aug2 = datetime.datetime(2003, 8, 2, tzinfo = MST) ->>> tuple(aug2.utctimetuple()) -(2003, 8, 2, 6, 0, 0, 5, 214, 0) ->>> nov2 = datetime.datetime(2003, 11, 25, tzinfo = MST) ->>> tuple(nov2.utctimetuple()) -(2003, 11, 25, 7, 0, 0, 1, 329, 0) - -To convert from one timezone to another, just use the astimezone method. - ->>> aug2.isoformat() -'2003-08-02T00:00:00-06:00' ->>> aug2est = aug2.astimezone(win32timezone.TimeZoneInfo('Eastern Standard Time')) ->>> aug2est.isoformat() -'2003-08-02T02:00:00-04:00' - -calling the displayName member will return the display name as set in the -registry. - ->>> est = win32timezone.TimeZoneInfo('Eastern Standard Time') ->>> str(est.displayName) -'(UTC-05:00) Eastern Time (US & Canada)' - ->>> gmt = win32timezone.TimeZoneInfo('GMT Standard Time', True) ->>> str(gmt.displayName) -'(UTC) Dublin, Edinburgh, Lisbon, London' - -To get the complete list of available time zone keys, ->>> zones = win32timezone.TimeZoneInfo.get_all_time_zones() - -If you want to get them in an order that's sorted longitudinally ->>> zones = win32timezone.TimeZoneInfo.get_sorted_time_zones() - -TimeZoneInfo now supports being pickled and comparison ->>> import pickle ->>> tz = win32timezone.TimeZoneInfo('China Standard Time') ->>> tz == pickle.loads(pickle.dumps(tz)) -True - -It's possible to construct a TimeZoneInfo from a TimeZoneDescription -including the currently-defined zone. ->>> tz = win32timezone.TimeZoneInfo(TimeZoneDefinition.current()) ->>> tz == pickle.loads(pickle.dumps(tz)) -True - ->>> aest = win32timezone.TimeZoneInfo('AUS Eastern Standard Time') ->>> est = win32timezone.TimeZoneInfo('E. Australia Standard Time') ->>> dt = datetime.datetime(2006, 11, 11, 1, 0, 0, tzinfo = aest) ->>> estdt = dt.astimezone(est) ->>> estdt.strftime('%Y-%m-%d %H:%M:%S') -'2006-11-11 00:00:00' - ->>> dt = datetime.datetime(2007, 1, 12, 1, 0, 0, tzinfo = aest) ->>> estdt = dt.astimezone(est) ->>> estdt.strftime('%Y-%m-%d %H:%M:%S') -'2007-01-12 00:00:00' - ->>> dt = datetime.datetime(2007, 6, 13, 1, 0, 0, tzinfo = aest) ->>> estdt = dt.astimezone(est) ->>> estdt.strftime('%Y-%m-%d %H:%M:%S') -'2007-06-13 01:00:00' - -Microsoft now has a patch for handling time zones in 2007 (see -http://support.microsoft.com/gp/cp_dst) - -As a result, patched systems will give an incorrect result for -dates prior to the designated year except for Vista and its -successors, which have dynamic time zone support. ->>> nov2_pre_change = datetime.datetime(2003, 11, 2, tzinfo = MST) ->>> old_response = (2003, 11, 2, 7, 0, 0, 6, 306, 0) ->>> incorrect_patch_response = (2003, 11, 2, 6, 0, 0, 6, 306, 0) ->>> pre_response = nov2_pre_change.utctimetuple() ->>> pre_response in (old_response, incorrect_patch_response) -True - -Furthermore, unpatched systems pre-Vista will give an incorrect -result for dates after 2007. ->>> nov2_post_change = datetime.datetime(2007, 11, 2, tzinfo = MST) ->>> incorrect_unpatched_response = (2007, 11, 2, 7, 0, 0, 4, 306, 0) ->>> new_response = (2007, 11, 2, 6, 0, 0, 4, 306, 0) ->>> post_response = nov2_post_change.utctimetuple() ->>> post_response in (new_response, incorrect_unpatched_response) -True - - -There is a function you can call to get some capabilities of the time -zone data. ->>> caps = GetTZCapabilities() ->>> isinstance(caps, dict) -True ->>> 'MissingTZPatch' in caps -True ->>> 'DynamicTZSupport' in caps -True - ->>> both_dates_correct = (pre_response == old_response and post_response == new_response) ->>> old_dates_wrong = (pre_response == incorrect_patch_response) ->>> new_dates_wrong = (post_response == incorrect_unpatched_response) - ->>> caps['DynamicTZSupport'] == both_dates_correct -True - ->>> (not caps['DynamicTZSupport'] and caps['MissingTZPatch']) == new_dates_wrong -True - ->>> (not caps['DynamicTZSupport'] and not caps['MissingTZPatch']) == old_dates_wrong -True - -This test helps ensure language support for unicode characters ->>> x = TIME_ZONE_INFORMATION(0, u'français') - - -Test conversion from one time zone to another at a DST boundary -=============================================================== - ->>> tz_hi = TimeZoneInfo('Hawaiian Standard Time') ->>> tz_pac = TimeZoneInfo('Pacific Standard Time') ->>> time_before = datetime.datetime(2011, 11, 5, 15, 59, 59, tzinfo=tz_hi) ->>> tz_hi.utcoffset(time_before) -datetime.timedelta(-1, 50400) ->>> tz_hi.dst(time_before) -datetime.timedelta(0) - -Hawaii doesn't need dynamic TZ info ->>> getattr(tz_hi, 'dynamicInfo', None) - -Here's a time that gave some trouble as reported in #3523104 -because one minute later, the equivalent UTC time changes from DST -in the U.S. ->>> dt_hi = datetime.datetime(2011, 11, 5, 15, 59, 59, 0, tzinfo=tz_hi) ->>> dt_hi.timetuple() -time.struct_time(tm_year=2011, tm_mon=11, tm_mday=5, tm_hour=15, tm_min=59, tm_sec=59, tm_wday=5, tm_yday=309, tm_isdst=0) ->>> dt_hi.utctimetuple() -time.struct_time(tm_year=2011, tm_mon=11, tm_mday=6, tm_hour=1, tm_min=59, tm_sec=59, tm_wday=6, tm_yday=310, tm_isdst=0) - -Convert the time to pacific time. ->>> dt_pac = dt_hi.astimezone(tz_pac) ->>> dt_pac.timetuple() -time.struct_time(tm_year=2011, tm_mon=11, tm_mday=5, tm_hour=18, tm_min=59, tm_sec=59, tm_wday=5, tm_yday=309, tm_isdst=1) - -Notice that the UTC time is almost 2am. ->>> dt_pac.utctimetuple() -time.struct_time(tm_year=2011, tm_mon=11, tm_mday=6, tm_hour=1, tm_min=59, tm_sec=59, tm_wday=6, tm_yday=310, tm_isdst=0) - -Now do the same tests one minute later in Hawaii. ->>> time_after = datetime.datetime(2011, 11, 5, 16, 0, 0, 0, tzinfo=tz_hi) ->>> tz_hi.utcoffset(time_after) -datetime.timedelta(-1, 50400) ->>> tz_hi.dst(time_before) -datetime.timedelta(0) - ->>> dt_hi = datetime.datetime(2011, 11, 5, 16, 0, 0, 0, tzinfo=tz_hi) ->>> print dt_hi.timetuple() -time.struct_time(tm_year=2011, tm_mon=11, tm_mday=5, tm_hour=16, tm_min=0, tm_sec=0, tm_wday=5, tm_yday=309, tm_isdst=0) ->>> print dt_hi.utctimetuple() -time.struct_time(tm_year=2011, tm_mon=11, tm_mday=6, tm_hour=2, tm_min=0, tm_sec=0, tm_wday=6, tm_yday=310, tm_isdst=0) - -According to the docs, this is what astimezone does. ->>> utc = (dt_hi - dt_hi.utcoffset()).replace(tzinfo=tz_pac) ->>> utc -datetime.datetime(2011, 11, 6, 2, 0, tzinfo=TimeZoneInfo('Pacific Standard Time')) ->>> tz_pac.fromutc(utc) == dt_hi.astimezone(tz_pac) -True ->>> tz_pac.fromutc(utc) -datetime.datetime(2011, 11, 5, 19, 0, tzinfo=TimeZoneInfo('Pacific Standard Time')) - -Make sure the converted time is correct. ->>> dt_pac = dt_hi.astimezone(tz_pac) ->>> dt_pac.timetuple() -time.struct_time(tm_year=2011, tm_mon=11, tm_mday=5, tm_hour=19, tm_min=0, tm_sec=0, tm_wday=5, tm_yday=309, tm_isdst=1) ->>> dt_pac.utctimetuple() -time.struct_time(tm_year=2011, tm_mon=11, tm_mday=6, tm_hour=2, tm_min=0, tm_sec=0, tm_wday=6, tm_yday=310, tm_isdst=0) - -Check some internal methods ->>> tz_pac._getStandardBias(datetime.datetime(2011, 1, 1)) -datetime.timedelta(0, 28800) ->>> tz_pac._getDaylightBias(datetime.datetime(2011, 1, 1)) -datetime.timedelta(0, 25200) - -Test the offsets ->>> offset = tz_pac.utcoffset(datetime.datetime(2011, 11, 6, 2, 0)) ->>> offset == datetime.timedelta(hours=-8) -True ->>> dst_offset = tz_pac.dst(datetime.datetime(2011, 11, 6, 2, 0) + offset) ->>> dst_offset == datetime.timedelta(hours=1) -True ->>> (offset + dst_offset) == datetime.timedelta(hours=-7) -True - - -Test offsets that occur right at the DST changeover ->>> datetime.datetime.utcfromtimestamp(1320570000).replace( -... tzinfo=TimeZoneInfo.utc()).astimezone(tz_pac) -datetime.datetime(2011, 11, 6, 1, 0, tzinfo=TimeZoneInfo('Pacific Standard Time')) - -""" -from __future__ import generators - -__author__ = 'Jason R. Coombs ' - -import _winreg -import struct -import datetime -import win32api -import re -import operator -import warnings -from itertools import count - -import logging -log = logging.getLogger(__file__) - -# A couple of objects for working with objects as if they were native C-type -# structures. -class _SimpleStruct(object): - _fields_ = None # must be overridden by subclasses - def __init__(self, *args, **kw): - for i, (name, typ) in enumerate(self._fields_): - def_arg = None - if i < len(args): - def_arg = args[i] - if name in kw: - def_arg = kw[name] - if def_arg is not None: - if not isinstance(def_arg, tuple): - def_arg = (def_arg,) - else: - def_arg = () - if len(def_arg)==1 and isinstance(def_arg[0], typ): - # already an object of this type. - # XXX - should copy.copy??? - def_val = def_arg[0] - else: - def_val = typ(*def_arg) - setattr(self, name, def_val) - - def field_names(self): - return [f[0] for f in self._fields_] - - def __eq__(self, other): - if not hasattr(other, "_fields_"): - return False - if self._fields_ != other._fields_: - return False - for name, _ in self._fields_: - if getattr(self, name) != getattr(other, name): - return False - return True - - def __ne__(self, other): - return not self.__eq__(other) - -class SYSTEMTIME(_SimpleStruct): - _fields_ = [ - ('year', int), - ('month', int), - ('day_of_week', int), - ('day', int), - ('hour', int), - ('minute', int), - ('second', int), - ('millisecond', int), - ] - -class TIME_ZONE_INFORMATION(_SimpleStruct): - _fields_ = [ - ('bias', int), - ('standard_name', unicode), - ('standard_start', SYSTEMTIME), - ('standard_bias', int), - ('daylight_name', unicode), - ('daylight_start', SYSTEMTIME), - ('daylight_bias', int), - ] - -class DYNAMIC_TIME_ZONE_INFORMATION(_SimpleStruct): - _fields_ = TIME_ZONE_INFORMATION._fields_ + [ - ('key_name', unicode), - ('dynamic_daylight_time_disabled', bool), - ] - - -class TimeZoneDefinition(DYNAMIC_TIME_ZONE_INFORMATION): - """ - A time zone definition class based on the win32 - DYNAMIC_TIME_ZONE_INFORMATION structure. - - Describes a bias against UTC (bias), and two dates at which a separate - additional bias applies (standard_bias and daylight_bias). - """ - - def __init__(self, *args, **kwargs): - """ - Try to construct a TimeZoneDefinition from - a) [DYNAMIC_]TIME_ZONE_INFORMATION args - b) another TimeZoneDefinition - c) a byte structure (using _from_bytes) - """ - try: - super(TimeZoneDefinition, self).__init__(*args, **kwargs) - return - except (TypeError, ValueError): - pass - - try: - self.__init_from_other(*args, **kwargs) - return - except TypeError: - pass - - try: - self.__init_from_bytes(*args, **kwargs) - return - except TypeError: - pass - - raise TypeError("Invalid arguments for %s" % self.__class__) - - def __init_from_bytes(self, bytes, standard_name='', daylight_name='', key_name='', daylight_disabled=False): - format = '3l8h8h' - components = struct.unpack(format, bytes) - bias, standard_bias, daylight_bias = components[:3] - standard_start = SYSTEMTIME(*components[3:11]) - daylight_start = SYSTEMTIME(*components[11:19]) - super(TimeZoneDefinition, self).__init__(bias, - standard_name, standard_start, standard_bias, - daylight_name, daylight_start, daylight_bias, - key_name, daylight_disabled,) - - def __init_from_other(self, other): - if not isinstance(other, TIME_ZONE_INFORMATION): - raise TypeError("Not a TIME_ZONE_INFORMATION") - for name in other.field_names(): - # explicitly get the value from the underlying structure - value = super(TimeZoneDefinition, other).__getattribute__(other, name) - setattr(self, name, value) - # consider instead of the loop above just copying the memory directly - #size = max(ctypes.sizeof(DYNAMIC_TIME_ZONE_INFO), ctypes.sizeof(other)) - #ctypes.memmove(ctypes.addressof(self), other, size) - - def __getattribute__(self, attr): - value = super(TimeZoneDefinition, self).__getattribute__(attr) - if 'bias' in attr: - make_minute_timedelta = lambda m: datetime.timedelta(minutes = m) - value = make_minute_timedelta(value) - return value - - @classmethod - def current(class_): - "Windows Platform SDK GetTimeZoneInformation" - code, tzi = win32api.GetTimeZoneInformation(True) - return code, class_(*tzi) - - def set(self): - tzi = tuple(getattr(self, n) for n, t in self._fields_) - win32api.SetTimeZoneInformation(tzi) - - def copy(self): - # XXX - this is no longer a copy! - return self.__class__(self) - - def locate_daylight_start(self, year): - return self._locate_day(year, self.daylight_start) - - def locate_standard_start(self, year): - return self._locate_day(year, self.standard_start) - - @staticmethod - def _locate_day(year, cutoff): - """ - Takes a SYSTEMTIME object, such as retrieved from a TIME_ZONE_INFORMATION - structure or call to GetTimeZoneInformation and interprets it based on the given - year to identify the actual day. - - This method is necessary because the SYSTEMTIME structure refers to a day by its - day of the week and week of the month (e.g. 4th saturday in March). - - >>> SATURDAY = 6 - >>> MARCH = 3 - >>> st = SYSTEMTIME(2000, MARCH, SATURDAY, 4, 0, 0, 0, 0) - - # according to my calendar, the 4th Saturday in March in 2009 was the 28th - >>> expected_date = datetime.datetime(2009, 3, 28) - >>> TimeZoneDefinition._locate_day(2009, st) == expected_date - True - """ - # MS stores Sunday as 0, Python datetime stores Monday as zero - target_weekday = (cutoff.day_of_week + 6) % 7 - # For SYSTEMTIMEs relating to time zone inforamtion, cutoff.day - # is the week of the month - week_of_month = cutoff.day - # so the following is the first day of that week - day = (week_of_month - 1) * 7 + 1 - result = datetime.datetime(year, cutoff.month, day, - cutoff.hour, cutoff.minute, cutoff.second, cutoff.millisecond) - # now the result is the correct week, but not necessarily the correct day of the week - days_to_go = (target_weekday - result.weekday()) % 7 - result += datetime.timedelta(days_to_go) - # if we selected a day in the month following the target month, - # move back a week or two. - # This is necessary because Microsoft defines the fifth week in a month - # to be the last week in a month and adding the time delta might have - # pushed the result into the next month. - while result.month == cutoff.month + 1: - result -= datetime.timedelta(weeks = 1) - return result - -class TimeZoneInfo(datetime.tzinfo): - """ - Main class for handling Windows time zones. - Usage: - TimeZoneInfo(

" - print "%s %s %10d %s" % (date_str, attr_string, info[5], rel_name) - total_size = total_size + info[5] - print " " * 14 + "%3d files, %10d bytes" % (len(files), total_size) - -def run(args): - """run program [args] - Starts the specified program on the remote device. - """ - prog_args = [] - for arg in args: - if " " in arg: - prog_args.append('"' + arg + '"') - else: - prog_args.append(arg) - prog_args = string.join(prog_args, " ") - wincerapi.CeCreateProcess(prog_args, "", None, None, 0, 0, None, "", None) - -def delete(args): - """delete file, ... - Delete one or more remote files - """ - for arg in args: - try: - wincerapi.CeDeleteFile(arg) - print "Deleted: %s" % arg - except win32api.error, details: - print_error(details, "Error deleting '%s'" % arg) - -def DumpCommands(): - print "%-10s - %s" % ("Command", "Description") - print "%-10s - %s" % ("-------", "-----------") - for name, item in globals().items(): - if type(item)==type(DumpCommands): - doc = getattr(item, "__doc__", "") - if doc: - lines = string.split(doc, "\n") - print "%-10s - %s" % (name, lines[0]) - for line in lines[1:]: - if line: - print " " * 8, line - -def main(): - if len(sys.argv)<2: - print "You must specify a command!" - DumpCommands() - return - command = sys.argv[1] - fn = globals().get(command) - if fn is None: - print "Unknown command:", command - DumpCommands() - return - - wincerapi.CeRapiInit() - try: - verinfo = wincerapi.CeGetVersionEx() - print "Connected to device, CE version %d.%d %s" % (verinfo[0], verinfo[1], verinfo[4]) - try: - fn(sys.argv[2:]) - except InvalidUsage, msg: - print "Invalid syntax -", msg - print fn.__doc__ - - finally: - try: - wincerapi.CeRapiUninit() - except win32api.error, details: - print_error(details, "Error disconnecting") - -if __name__=='__main__': - main() diff --git a/python/Lib/site-packages/win32/scripts/killProcName.py b/python/Lib/site-packages/win32/scripts/killProcName.py deleted file mode 100755 index 2f469e8ab9..0000000000 --- a/python/Lib/site-packages/win32/scripts/killProcName.py +++ /dev/null @@ -1,56 +0,0 @@ -# Kills a process by process name -# -# Uses the Performance Data Helper to locate the PID, then kills it. -# Will only kill the process if there is only one process of that name -# (eg, attempting to kill "Python.exe" will only work if there is only -# one Python.exe running. (Note that the current process does not -# count - ie, if Python.exe is hosting this script, you can still kill -# another Python.exe (as long as there is only one other Python.exe) - -# Really just a demo for the win32pdh(util) module, which allows you -# to get all sorts of information about a running process and many -# other aspects of your system. - -import win32api, win32pdhutil, win32con, sys - -def killProcName(procname): - # Change suggested by Dan Knierim, who found that this performed a - # "refresh", allowing us to kill processes created since this was run - # for the first time. - try: - win32pdhutil.GetPerformanceAttributes('Process','ID Process',procname) - except: - pass - - pids = win32pdhutil.FindPerformanceAttributesByName(procname) - - # If _my_ pid in there, remove it! - try: - pids.remove(win32api.GetCurrentProcessId()) - except ValueError: - pass - - if len(pids)==0: - result = "Can't find %s" % procname - elif len(pids)>1: - result = "Found too many %s's - pids=`%s`" % (procname,pids) - else: - handle = win32api.OpenProcess(win32con.PROCESS_TERMINATE, 0,pids[0]) - win32api.TerminateProcess(handle,0) - win32api.CloseHandle(handle) - result = "" - - return result - -if __name__ == '__main__': - if len(sys.argv)>1: - for procname in sys.argv[1:]: - result = killProcName(procname) - if result: - print result - print "Dumping all processes..." - win32pdhutil.ShowAllProcesses() - else: - print "Killed %s" % procname - else: - print "Usage: killProcName.py procname ..." diff --git a/python/Lib/site-packages/win32/scripts/rasutil.py b/python/Lib/site-packages/win32/scripts/rasutil.py deleted file mode 100755 index 294a3f6bf7..0000000000 --- a/python/Lib/site-packages/win32/scripts/rasutil.py +++ /dev/null @@ -1,87 +0,0 @@ -# A demo of using the RAS API from Python -import sys -import win32ras - -# The error raised if we can not -class ConnectionError(Exception): - pass - -def Connect(rasEntryName, numRetries = 5): - """Make a connection to the specified RAS entry. - - Returns a tuple of (bool, handle) on success. - - bool is 1 if a new connection was established, or 0 is a connection already existed. - - handle is a RAS HANDLE that can be passed to Disconnect() to end the connection. - - Raises a ConnectionError if the connection could not be established. - """ - assert numRetries > 0 - for info in win32ras.EnumConnections(): - if info[1].lower()==rasEntryName.lower(): - print "Already connected to", rasEntryName - return 0, info[0] - - dial_params, have_pw = win32ras.GetEntryDialParams(None, rasEntryName) - if not have_pw: - print "Error: The password is not saved for this connection" - print "Please connect manually selecting the 'save password' option and try again" - sys.exit(1) - - print "Connecting to", rasEntryName, "..." - retryCount = numRetries - while retryCount > 0: - rasHandle, errCode = win32ras.Dial(None, None, dial_params, None) - if win32ras.IsHandleValid(rasHandle): - bValid = 1 - break - print "Retrying..." - win32api.Sleep(5000) - retryCount = retryCount - 1 - - if errCode: - raise ConnectionError(errCode, win32ras.GetErrorString(errCode)) - return 1, rasHandle - -def Disconnect(handle): - if type(handle)==type(''): # have they passed a connection name? - for info in win32ras.EnumConnections(): - if info[1].lower()==handle.lower(): - handle = info[0] - break - else: - raise ConnectionError(0, "Not connected to entry '%s'" % handle) - - win32ras.HangUp(handle) - -usage="""rasutil.py - Utilities for using RAS - -Usage: - rasutil [-r retryCount] [-c rasname] [-d rasname] - - -r retryCount - Number of times to retry the RAS connection - -c rasname - Connect to the phonebook entry specified by rasname - -d rasname - Disconnect from the phonebook entry specified by rasname -""" - -def Usage(why): - print why - print usage - sys.exit(1) - -if __name__=='__main__': - import getopt - try: - opts, args = getopt.getopt(sys.argv[1:], "r:c:d:") - except getopt.error, why: - Usage(why) - retries = 5 - if len(args) != 0: - Usage("Invalid argument") - - for opt, val in opts: - if opt=='-c': - Connect(val, retries) - if opt=='-d': - Disconnect(val) - if opt=='-r': - retries = int(val) diff --git a/python/Lib/site-packages/win32/scripts/regsetup.py b/python/Lib/site-packages/win32/scripts/regsetup.py deleted file mode 100755 index 71e66fdd8d..0000000000 --- a/python/Lib/site-packages/win32/scripts/regsetup.py +++ /dev/null @@ -1,519 +0,0 @@ -# A tool to setup the Python registry. - -class error(Exception): - pass - -import sys # at least we can count on this! - -def FileExists(fname): - """Check if a file exists. Returns true or false. - """ - import os - try: - os.stat(fname) - return 1 - except os.error, details: - return 0 - -def IsPackageDir(path, packageName, knownFileName): - """Given a path, a ni package name, and possibly a known file name in - the root of the package, see if this path is good. - """ - import os - if knownFileName is None: - knownFileName = "." - return FileExists(os.path.join(os.path.join(path, packageName),knownFileName)) - -def IsDebug(): - """Return "_d" if we're running a debug version. - - This is to be used within DLL names when locating them. - """ - import imp - for suffix_item in imp.get_suffixes(): - if suffix_item[0]=='_d.pyd': - return '_d' - return '' - -def FindPackagePath(packageName, knownFileName, searchPaths): - """Find a package. - - Given a ni style package name, check the package is registered. - - First place looked is the registry for an existing entry. Then - the searchPaths are searched. - """ - import regutil, os - pathLook = regutil.GetRegisteredNamedPath(packageName) - if pathLook and IsPackageDir(pathLook, packageName, knownFileName): - return pathLook, None # The currently registered one is good. - # Search down the search paths. - for pathLook in searchPaths: - if IsPackageDir(pathLook, packageName, knownFileName): - # Found it - ret = os.path.abspath(pathLook) - return ret, ret - raise error("The package %s can not be located" % packageName) - -def FindHelpPath(helpFile, helpDesc, searchPaths): - # See if the current registry entry is OK - import os, win32api, win32con - try: - key = win32api.RegOpenKey(win32con.HKEY_LOCAL_MACHINE, "Software\\Microsoft\\Windows\\Help", 0, win32con.KEY_ALL_ACCESS) - try: - try: - path = win32api.RegQueryValueEx(key, helpDesc)[0] - if FileExists(os.path.join(path, helpFile)): - return os.path.abspath(path) - except win32api.error: - pass # no registry entry. - finally: - key.Close() - except win32api.error: - pass - for pathLook in searchPaths: - if FileExists(os.path.join(pathLook, helpFile)): - return os.path.abspath(pathLook) - pathLook = os.path.join(pathLook, "Help") - if FileExists(os.path.join( pathLook, helpFile)): - return os.path.abspath(pathLook) - raise error("The help file %s can not be located" % helpFile) - -def FindAppPath(appName, knownFileName, searchPaths): - """Find an application. - - First place looked is the registry for an existing entry. Then - the searchPaths are searched. - """ - # Look in the first path. - import regutil, string, os - regPath = regutil.GetRegisteredNamedPath(appName) - if regPath: - pathLook = regPath.split(";")[0] - if regPath and FileExists(os.path.join(pathLook, knownFileName)): - return None # The currently registered one is good. - # Search down the search paths. - for pathLook in searchPaths: - if FileExists(os.path.join(pathLook, knownFileName)): - # Found it - return os.path.abspath(pathLook) - raise error("The file %s can not be located for application %s" % (knownFileName, appName)) - -def FindPythonExe(exeAlias, possibleRealNames, searchPaths): - """Find an exe. - - Returns the full path to the .exe, and a boolean indicating if the current - registered entry is OK. We don't trust the already registered version even - if it exists - it may be wrong (ie, for a different Python version) - """ - import win32api, regutil, string, os, sys - if possibleRealNames is None: - possibleRealNames = exeAlias - # Look first in Python's home. - found = os.path.join(sys.prefix, possibleRealNames) - if not FileExists(found): # for developers - if "64 bit" in sys.version: - found = os.path.join(sys.prefix, "PCBuild", "amd64", possibleRealNames) - else: - found = os.path.join(sys.prefix, "PCBuild", possibleRealNames) - if not FileExists(found): - found = LocateFileName(possibleRealNames, searchPaths) - - registered_ok = 0 - try: - registered = win32api.RegQueryValue(regutil.GetRootKey(), regutil.GetAppPathsKey() + "\\" + exeAlias) - registered_ok = found==registered - except win32api.error: - pass - return found, registered_ok - -def QuotedFileName(fname): - """Given a filename, return a quoted version if necessary - """ - import regutil, string - try: - fname.index(" ") # Other chars forcing quote? - return '"%s"' % fname - except ValueError: - # No space in name. - return fname - -def LocateFileName(fileNamesString, searchPaths): - """Locate a file name, anywhere on the search path. - - If the file can not be located, prompt the user to find it for us - (using a common OpenFile dialog) - - Raises KeyboardInterrupt if the user cancels. - """ - import regutil, string, os - fileNames = fileNamesString.split(";") - for path in searchPaths: - for fileName in fileNames: - try: - retPath = os.path.join(path, fileName) - os.stat(retPath) - break - except os.error: - retPath = None - if retPath: - break - else: - fileName = fileNames[0] - try: - import win32ui, win32con - except ImportError: - raise error("Need to locate the file %s, but the win32ui module is not available\nPlease run the program again, passing as a parameter the path to this file." % fileName) - # Display a common dialog to locate the file. - flags=win32con.OFN_FILEMUSTEXIST - ext = os.path.splitext(fileName)[1] - filter = "Files of requested type (*%s)|*%s||" % (ext,ext) - dlg = win32ui.CreateFileDialog(1,None,fileName,flags,filter,None) - dlg.SetOFNTitle("Locate " + fileName) - if dlg.DoModal() != win32con.IDOK: - raise KeyboardInterrupt("User cancelled the process") - retPath = dlg.GetPathName() - return os.path.abspath(retPath) - -def LocatePath(fileName, searchPaths): - """Like LocateFileName, but returns a directory only. - """ - import os - return os.path.abspath(os.path.split(LocateFileName(fileName, searchPaths))[0]) - -def LocateOptionalPath(fileName, searchPaths): - """Like LocatePath, but returns None if the user cancels. - """ - try: - return LocatePath(fileName, searchPaths) - except KeyboardInterrupt: - return None - - -def LocateOptionalFileName(fileName, searchPaths = None): - """Like LocateFileName, but returns None if the user cancels. - """ - try: - return LocateFileName(fileName, searchPaths) - except KeyboardInterrupt: - return None - -def LocatePythonCore(searchPaths): - """Locate and validate the core Python directories. Returns a list - of paths that should be used as the core (ie, un-named) portion of - the Python path. - """ - import os, regutil - currentPath = regutil.GetRegisteredNamedPath(None) - if currentPath: - presearchPaths = currentPath.split(";") - else: - presearchPaths = [os.path.abspath(".")] - libPath = None - for path in presearchPaths: - if FileExists(os.path.join(path, "os.py")): - libPath = path - break - if libPath is None and searchPaths is not None: - libPath = LocatePath("os.py", searchPaths) - if libPath is None: - raise error("The core Python library could not be located.") - - corePath = None - suffix = IsDebug() - for path in presearchPaths: - if FileExists(os.path.join(path, "unicodedata%s.pyd" % suffix)): - corePath = path - break - if corePath is None and searchPaths is not None: - corePath = LocatePath("unicodedata%s.pyd" % suffix, searchPaths) - if corePath is None: - raise error("The core Python path could not be located.") - - installPath = os.path.abspath(os.path.join(libPath, "..")) - return installPath, [libPath, corePath] - -def FindRegisterPackage(packageName, knownFile, searchPaths, registryAppName = None): - """Find and Register a package. - - Assumes the core registry setup correctly. - - In addition, if the location located by the package is already - in the **core** path, then an entry is registered, but no path. - (no other paths are checked, as the application whose path was used - may later be uninstalled. This should not happen with the core) - """ - import regutil, string - if not packageName: raise error("A package name must be supplied") - corePaths = regutil.GetRegisteredNamedPath(None).split(";") - if not searchPaths: searchPaths = corePaths - registryAppName = registryAppName or packageName - try: - pathLook, pathAdd = FindPackagePath(packageName, knownFile, searchPaths) - if pathAdd is not None: - if pathAdd in corePaths: - pathAdd = "" - regutil.RegisterNamedPath(registryAppName, pathAdd) - return pathLook - except error, details: - print "*** The %s package could not be registered - %s" % (packageName, details) - print "*** Please ensure you have passed the correct paths on the command line." - print "*** - For packages, you should pass a path to the packages parent directory," - print "*** - and not the package directory itself..." - - -def FindRegisterApp(appName, knownFiles, searchPaths): - """Find and Register a package. - - Assumes the core registry setup correctly. - - """ - import regutil, string - if type(knownFiles)==type(''): - knownFiles = [knownFiles] - paths=[] - try: - for knownFile in knownFiles: - pathLook = FindAppPath(appName, knownFile, searchPaths) - if pathLook: - paths.append(pathLook) - except error, details: - print "*** ", details - return - - regutil.RegisterNamedPath(appName, ";".join(paths)) - -def FindRegisterPythonExe(exeAlias, searchPaths, actualFileNames = None): - """Find and Register a Python exe (not necessarily *the* python.exe) - - Assumes the core registry setup correctly. - """ - import regutil, string - fname, ok = FindPythonExe(exeAlias, actualFileNames, searchPaths) - if not ok: - regutil.RegisterPythonExe(fname, exeAlias) - return fname - - -def FindRegisterHelpFile(helpFile, searchPaths, helpDesc = None ): - import regutil - - try: - pathLook = FindHelpPath(helpFile, helpDesc, searchPaths) - except error, details: - print "*** ", details - return -# print "%s found at %s" % (helpFile, pathLook) - regutil.RegisterHelpFile(helpFile, pathLook, helpDesc) - -def SetupCore(searchPaths): - """Setup the core Python information in the registry. - - This function makes no assumptions about the current state of sys.path. - - After this function has completed, you should have access to the standard - Python library, and the standard Win32 extensions - """ - - import sys - for path in searchPaths: - sys.path.append(path) - - import os - import regutil, win32api,win32con - - installPath, corePaths = LocatePythonCore(searchPaths) - # Register the core Pythonpath. - print corePaths - regutil.RegisterNamedPath(None, ';'.join(corePaths)) - - # Register the install path. - hKey = win32api.RegCreateKey(regutil.GetRootKey() , regutil.BuildDefaultPythonKey()) - try: - # Core Paths. - win32api.RegSetValue(hKey, "InstallPath", win32con.REG_SZ, installPath) - finally: - win32api.RegCloseKey(hKey) - - # Register the win32 core paths. - win32paths = os.path.abspath( os.path.split(win32api.__file__)[0]) + ";" + \ - os.path.abspath( os.path.split(LocateFileName("win32con.py;win32con.pyc", sys.path ) )[0] ) - - # Python has builtin support for finding a "DLLs" directory, but - # not a PCBuild. Having it in the core paths means it is ignored when - # an EXE not in the Python dir is hosting us - so we add it as a named - # value - check = os.path.join(sys.prefix, "PCBuild") - if "64 bit" in sys.version: - check = os.path.join(check, "amd64") - if os.path.isdir(check): - regutil.RegisterNamedPath("PCBuild",check) - -def RegisterShellInfo(searchPaths): - """Registers key parts of the Python installation with the Windows Shell. - - Assumes a valid, minimal Python installation exists - (ie, SetupCore() has been previously successfully run) - """ - import regutil, win32con - suffix = IsDebug() - # Set up a pointer to the .exe's - exePath = FindRegisterPythonExe("Python%s.exe" % suffix, searchPaths) - regutil.SetRegistryDefaultValue(".py", "Python.File", win32con.HKEY_CLASSES_ROOT) - regutil.RegisterShellCommand("Open", QuotedFileName(exePath)+" \"%1\" %*", "&Run") - regutil.SetRegistryDefaultValue("Python.File\\DefaultIcon", "%s,0" % exePath, win32con.HKEY_CLASSES_ROOT) - - FindRegisterHelpFile("Python.hlp", searchPaths, "Main Python Documentation") - FindRegisterHelpFile("ActivePython.chm", searchPaths, "Main Python Documentation") - - # We consider the win32 core, as it contains all the win32 api type - # stuff we need. -# FindRegisterApp("win32", ["win32con.pyc", "win32api%s.pyd" % suffix], searchPaths) - -usage = """\ -regsetup.py - Setup/maintain the registry for Python apps. - -Run without options, (but possibly search paths) to repair a totally broken -python registry setup. This should allow other options to work. - -Usage: %s [options ...] paths ... --p packageName -- Find and register a package. Looks in the paths for - a sub-directory with the name of the package, and - adds a path entry for the package. --a appName -- Unconditionally add an application name to the path. - A new path entry is create with the app name, and the - paths specified are added to the registry. --c -- Add the specified paths to the core Pythonpath. - If a path appears on the core path, and a package also - needs that same path, the package will not bother - registering it. Therefore, By adding paths to the - core path, you can avoid packages re-registering the same path. --m filename -- Find and register the specific file name as a module. - Do not include a path on the filename! ---shell -- Register everything with the Win95/NT shell. ---upackage name -- Unregister the package ---uapp name -- Unregister the app (identical to --upackage) ---umodule name -- Unregister the module - ---description -- Print a description of the usage. ---examples -- Print examples of usage. -""" % sys.argv[0] - -description="""\ -If no options are processed, the program attempts to validate and set -the standard Python path to the point where the standard library is -available. This can be handy if you move Python to a new drive/sub-directory, -in which case most of the options would fail (as they need at least string.py, -os.py etc to function.) -Running without options should repair Python well enough to run with -the other options. - -paths are search paths that the program will use to seek out a file. -For example, when registering the core Python, you may wish to -provide paths to non-standard places to look for the Python help files, -library files, etc. - -See also the "regcheck.py" utility which will check and dump the contents -of the registry. -""" - -examples="""\ -Examples: -"regsetup c:\\wierd\\spot\\1 c:\\wierd\\spot\\2" -Attempts to setup the core Python. Looks in some standard places, -as well as the 2 wierd spots to locate the core Python files (eg, Python.exe, -python14.dll, the standard library and Win32 Extensions. - -"regsetup -a myappname . .\subdir" -Registers a new Pythonpath entry named myappname, with "C:\\I\\AM\\HERE" and -"C:\\I\\AM\\HERE\subdir" added to the path (ie, all args are converted to -absolute paths) - -"regsetup -c c:\\my\\python\\files" -Unconditionally add "c:\\my\\python\\files" to the 'core' Python path. - -"regsetup -m some.pyd \\windows\\system" -Register the module some.pyd in \\windows\\system as a registered -module. This will allow some.pyd to be imported, even though the -windows system directory is not (usually!) on the Python Path. - -"regsetup --umodule some" -Unregister the module "some". This means normal import rules then apply -for that module. -""" - -if __name__=='__main__': - if len(sys.argv)>1 and sys.argv[1] in ['/?','-?','-help','-h']: - print usage - elif len(sys.argv)==1 or not sys.argv[1][0] in ['/','-']: - # No args, or useful args. - searchPath = sys.path[:] - for arg in sys.argv[1:]: - searchPath.append(arg) - # Good chance we are being run from the "regsetup.py" directory. - # Typically this will be "\somewhere\win32\Scripts" and the - # "somewhere" and "..\Lib" should also be searched. - searchPath.append("..\\Build") - searchPath.append("..\\Lib") - searchPath.append("..") - searchPath.append("..\\..") - - # for developers: - # also search somewhere\lib, ..\build, and ..\..\build - searchPath.append("..\\..\\lib") - searchPath.append("..\\build") - if "64 bit" in sys.version: - searchPath.append("..\\..\\pcbuild\\amd64") - else: - searchPath.append("..\\..\\pcbuild") - - print "Attempting to setup/repair the Python core" - - SetupCore(searchPath) - RegisterShellInfo(searchPath) - FindRegisterHelpFile("PyWin32.chm", searchPath, "Pythonwin Reference") - # Check the registry. - print "Registration complete - checking the registry..." - import regcheck - regcheck.CheckRegistry() - else: - searchPaths = [] - import getopt, string - opts, args = getopt.getopt(sys.argv[1:], 'p:a:m:c', - ['shell','upackage=','uapp=','umodule=','description','examples']) - for arg in args: - searchPaths.append(arg) - for o,a in opts: - if o=='--description': - print description - if o=='--examples': - print examples - if o=='--shell': - print "Registering the Python core." - RegisterShellInfo(searchPaths) - if o=='-p': - print "Registering package", a - FindRegisterPackage(a,None,searchPaths) - if o in ['--upackage', '--uapp']: - import regutil - print "Unregistering application/package", a - regutil.UnregisterNamedPath(a) - if o=='-a': - import regutil - path = ";".join(searchPaths) - print "Registering application", a,"to path",path - regutil.RegisterNamedPath(a,path) - if o=='-c': - if not len(searchPaths): - raise error("-c option must provide at least one additional path") - import win32api, regutil - currentPaths = regutil.GetRegisteredNamedPath(None).split(";") - oldLen = len(currentPaths) - for newPath in searchPaths: - if newPath not in currentPaths: - currentPaths.append(newPath) - if len(currentPaths)!=oldLen: - print "Registering %d new core paths" % (len(currentPaths)-oldLen) - regutil.RegisterNamedPath(None,";".join(currentPaths)) - else: - print "All specified paths are already registered." diff --git a/python/Lib/site-packages/win32/scripts/setup_d.py b/python/Lib/site-packages/win32/scripts/setup_d.py deleted file mode 100755 index bc704b9322..0000000000 --- a/python/Lib/site-packages/win32/scripts/setup_d.py +++ /dev/null @@ -1,91 +0,0 @@ -# Install and register pythonxx_d.dll, pywintypesxx_d.dll and pythoncomxx_d.dll -# -# Assumes the _d files can be found in the same directory as this script -# or in the cwd. - -import win32api -import _winreg -import sys -import shutil -import os - -def usage_and_die(rc): - print - print "This script is designed to copy and register the Python debug" - print "binaries. It looks for pythonxx_d.dll, pythoncomxx_d.dll etc," - print "and installs them to work correctly with Python debug builds." - print - print "You will generally find this script in the. zip file that" - print "included these _d files. Please run this script from" - print "that directory" - sys.exit(rc) - -if win32api.__file__.find("_d") > 0: - print "This scripts appears to be running a DEBUG version of Python." - print "Please run it using a normal release build (python.exe)" - usage_and_die(1) - -try: - import pythoncom -except ImportError, details: - print "Could not import the release version of pythoncom" - print "The error details are: %s" % (details,) - print "Please correct this error and rerun the script" - usage_and_die(2) - -try: - import pywintypes -except ImportError, details: - print "Could not import the release version of pywintypes" - print "The error details are: %s" % (details,) - print "Please correct this error and rerun the script" - usage_and_die(2) - -def _docopy(src, dest): - orig_src = src - if not os.path.isfile(src): - src = os.path.join( os.path.split(sys.argv[0])[0], src) - print "Can not find %s or %s to copy" % (os.path.abspath(orig_src), os.path.abspath(src)) - return 0 - try: - shutil.copy(src, dest) - print "Copied %s -> %s" % (src, dest) - return 1 - except: - print "Error copying '%s' -> '%s'" % (src, dest) - print str(sys.exc_info[1]) - usage_and_die(3) - -def _doregister(mod_name, dll_name): - assert os.path.isfile(dll_name), "Shouldn't get here if the file doesn't exist!" - try: - key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, "Software\\Python\\PythonCore\\%s\\Modules\\%s" % (sys.winver, mod_name)) - except _winreg.error: - try: - key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, "Software\\Python\\PythonCore\\%s\\Modules\\%s" % (sys.winver, mod_name)) - except _winreg.error: - print "Could not find the existing '%s' module registered in the registry" % (mod_name,) - usage_and_die(4) - # Create the debug key. - sub_key = _winreg.CreateKey(key, "Debug") - _winreg.SetValue(sub_key, None, _winreg.REG_SZ, dll_name) - print "Registered '%s' in the registry" % (dll_name,) - -def _domodule(mod_name, release_mod_filename): - path, fname = os.path.split(release_mod_filename) - base, ext = os.path.splitext(fname) - new_fname = base + "_d" + ext - if _docopy(new_fname, path): - _doregister( mod_name, os.path.abspath( os.path.join(path, new_fname) ) ) - - -# First the main Python DLL. -path, fname = path, fname = os.path.split(win32api.GetModuleFileName(sys.dllhandle)) -base, ext = os.path.splitext(fname) -_docopy(base + "_d" + ext, path) - -# Then pythoncom and pywintypes. -_domodule("pythoncom", pythoncom.__file__) -_domodule("pywintypes", pywintypes.__file__) - -print "System _d files were setup." diff --git a/python/Lib/site-packages/win32/servicemanager.pyd b/python/Lib/site-packages/win32/servicemanager.pyd deleted file mode 100755 index 01c62f9c8d..0000000000 Binary files a/python/Lib/site-packages/win32/servicemanager.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/timer.pyd b/python/Lib/site-packages/win32/timer.pyd deleted file mode 100755 index cc5d13371e..0000000000 Binary files a/python/Lib/site-packages/win32/timer.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win2kras.pyd b/python/Lib/site-packages/win32/win2kras.pyd deleted file mode 100755 index efa3244d91..0000000000 Binary files a/python/Lib/site-packages/win32/win2kras.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32api.pyd b/python/Lib/site-packages/win32/win32api.pyd deleted file mode 100755 index e8d106d194..0000000000 Binary files a/python/Lib/site-packages/win32/win32api.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32clipboard.pyd b/python/Lib/site-packages/win32/win32clipboard.pyd deleted file mode 100755 index c39f2769c4..0000000000 Binary files a/python/Lib/site-packages/win32/win32clipboard.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32console.pyd b/python/Lib/site-packages/win32/win32console.pyd deleted file mode 100755 index 3f1255089a..0000000000 Binary files a/python/Lib/site-packages/win32/win32console.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32cred.pyd b/python/Lib/site-packages/win32/win32cred.pyd deleted file mode 100755 index 8624f3216d..0000000000 Binary files a/python/Lib/site-packages/win32/win32cred.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32crypt.pyd b/python/Lib/site-packages/win32/win32crypt.pyd deleted file mode 100755 index dfb3402a24..0000000000 Binary files a/python/Lib/site-packages/win32/win32crypt.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32event.pyd b/python/Lib/site-packages/win32/win32event.pyd deleted file mode 100755 index b723054a37..0000000000 Binary files a/python/Lib/site-packages/win32/win32event.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32evtlog.pyd b/python/Lib/site-packages/win32/win32evtlog.pyd deleted file mode 100755 index 538a0c74cf..0000000000 Binary files a/python/Lib/site-packages/win32/win32evtlog.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32file.pyd b/python/Lib/site-packages/win32/win32file.pyd deleted file mode 100755 index 2b5ac35137..0000000000 Binary files a/python/Lib/site-packages/win32/win32file.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32gui.pyd b/python/Lib/site-packages/win32/win32gui.pyd deleted file mode 100755 index 5dd7ace188..0000000000 Binary files a/python/Lib/site-packages/win32/win32gui.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32help.pyd b/python/Lib/site-packages/win32/win32help.pyd deleted file mode 100755 index 4bc7ba5439..0000000000 Binary files a/python/Lib/site-packages/win32/win32help.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32inet.pyd b/python/Lib/site-packages/win32/win32inet.pyd deleted file mode 100755 index cce6c2bace..0000000000 Binary files a/python/Lib/site-packages/win32/win32inet.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32job.pyd b/python/Lib/site-packages/win32/win32job.pyd deleted file mode 100755 index cb547b14c2..0000000000 Binary files a/python/Lib/site-packages/win32/win32job.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32lz.pyd b/python/Lib/site-packages/win32/win32lz.pyd deleted file mode 100755 index 2c42a164a2..0000000000 Binary files a/python/Lib/site-packages/win32/win32lz.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32net.pyd b/python/Lib/site-packages/win32/win32net.pyd deleted file mode 100755 index a5a56a1eaf..0000000000 Binary files a/python/Lib/site-packages/win32/win32net.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32pdh.pyd b/python/Lib/site-packages/win32/win32pdh.pyd deleted file mode 100755 index 0ac3b3e087..0000000000 Binary files a/python/Lib/site-packages/win32/win32pdh.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32pipe.pyd b/python/Lib/site-packages/win32/win32pipe.pyd deleted file mode 100755 index 2946638068..0000000000 Binary files a/python/Lib/site-packages/win32/win32pipe.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32print.pyd b/python/Lib/site-packages/win32/win32print.pyd deleted file mode 100755 index 9a733d5bc0..0000000000 Binary files a/python/Lib/site-packages/win32/win32print.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32process.pyd b/python/Lib/site-packages/win32/win32process.pyd deleted file mode 100755 index 1434f4b75e..0000000000 Binary files a/python/Lib/site-packages/win32/win32process.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32profile.pyd b/python/Lib/site-packages/win32/win32profile.pyd deleted file mode 100755 index 46c43c61c9..0000000000 Binary files a/python/Lib/site-packages/win32/win32profile.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32ras.pyd b/python/Lib/site-packages/win32/win32ras.pyd deleted file mode 100755 index 8675d32d41..0000000000 Binary files a/python/Lib/site-packages/win32/win32ras.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32security.pyd b/python/Lib/site-packages/win32/win32security.pyd deleted file mode 100755 index 9cf9daaf47..0000000000 Binary files a/python/Lib/site-packages/win32/win32security.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32service.pyd b/python/Lib/site-packages/win32/win32service.pyd deleted file mode 100755 index b0c1067626..0000000000 Binary files a/python/Lib/site-packages/win32/win32service.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32trace.pyd b/python/Lib/site-packages/win32/win32trace.pyd deleted file mode 100755 index c666cb7c6f..0000000000 Binary files a/python/Lib/site-packages/win32/win32trace.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32transaction.pyd b/python/Lib/site-packages/win32/win32transaction.pyd deleted file mode 100755 index 95327a48b3..0000000000 Binary files a/python/Lib/site-packages/win32/win32transaction.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32ts.pyd b/python/Lib/site-packages/win32/win32ts.pyd deleted file mode 100755 index 917882b644..0000000000 Binary files a/python/Lib/site-packages/win32/win32ts.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/win32wnet.pyd b/python/Lib/site-packages/win32/win32wnet.pyd deleted file mode 100755 index 787254e466..0000000000 Binary files a/python/Lib/site-packages/win32/win32wnet.pyd and /dev/null differ diff --git a/python/Lib/site-packages/win32/winxpgui.pyd b/python/Lib/site-packages/win32/winxpgui.pyd deleted file mode 100755 index e25f94ff9f..0000000000 Binary files a/python/Lib/site-packages/win32/winxpgui.pyd and /dev/null differ diff --git a/python/Lib/site.py b/python/Lib/site.py deleted file mode 100755 index f812c3a34a..0000000000 --- a/python/Lib/site.py +++ /dev/null @@ -1,591 +0,0 @@ -"""Append module search paths for third-party packages to sys.path. - -**************************************************************** -* This module is automatically imported during initialization. * -**************************************************************** - -In earlier versions of Python (up to 1.5a3), scripts or modules that -needed to use site-specific modules would place ``import site'' -somewhere near the top of their code. Because of the automatic -import, this is no longer necessary (but code that does it still -works). - -This will append site-specific paths to the module search path. On -Unix (including Mac OSX), it starts with sys.prefix and -sys.exec_prefix (if different) and appends -lib/python/site-packages as well as lib/site-python. -On other platforms (such as Windows), it tries each of the -prefixes directly, as well as with lib/site-packages appended. The -resulting directories, if they exist, are appended to sys.path, and -also inspected for path configuration files. - -A path configuration file is a file whose name has the form -.pth; its contents are additional directories (one per line) -to be added to sys.path. Non-existing directories (or -non-directories) are never added to sys.path; no directory is added to -sys.path more than once. Blank lines and lines beginning with -'#' are skipped. Lines starting with 'import' are executed. - -For example, suppose sys.prefix and sys.exec_prefix are set to -/usr/local and there is a directory /usr/local/lib/python2.5/site-packages -with three subdirectories, foo, bar and spam, and two path -configuration files, foo.pth and bar.pth. Assume foo.pth contains the -following: - - # foo package configuration - foo - bar - bletch - -and bar.pth contains: - - # bar package configuration - bar - -Then the following directories are added to sys.path, in this order: - - /usr/local/lib/python2.5/site-packages/bar - /usr/local/lib/python2.5/site-packages/foo - -Note that bletch is omitted because it doesn't exist; bar precedes foo -because bar.pth comes alphabetically before foo.pth; and spam is -omitted because it is not mentioned in either path configuration file. - -After these path manipulations, an attempt is made to import a module -named sitecustomize, which can perform arbitrary additional -site-specific customizations. If this import fails with an -ImportError exception, it is silently ignored. - -""" - -import sys -import os -import __builtin__ -import traceback - -# Prefixes for site-packages; add additional prefixes like /usr/local here -PREFIXES = [sys.prefix, sys.exec_prefix] -# Enable per user site-packages directory -# set it to False to disable the feature or True to force the feature -ENABLE_USER_SITE = None - -# for distutils.commands.install -# These values are initialized by the getuserbase() and getusersitepackages() -# functions, through the main() function when Python starts. -USER_SITE = None -USER_BASE = None - - -def makepath(*paths): - dir = os.path.join(*paths) - try: - dir = os.path.abspath(dir) - except OSError: - pass - return dir, os.path.normcase(dir) - - -def abs__file__(): - """Set all module' __file__ attribute to an absolute path""" - for m in sys.modules.values(): - if hasattr(m, '__loader__'): - continue # don't mess with a PEP 302-supplied __file__ - try: - m.__file__ = os.path.abspath(m.__file__) - except (AttributeError, OSError): - pass - - -def removeduppaths(): - """ Remove duplicate entries from sys.path along with making them - absolute""" - # This ensures that the initial path provided by the interpreter contains - # only absolute pathnames, even if we're running from the build directory. - L = [] - known_paths = set() - for dir in sys.path: - # Filter out duplicate paths (on case-insensitive file systems also - # if they only differ in case); turn relative paths into absolute - # paths. - dir, dircase = makepath(dir) - if not dircase in known_paths: - L.append(dir) - known_paths.add(dircase) - sys.path[:] = L - return known_paths - - -def _init_pathinfo(): - """Return a set containing all existing directory entries from sys.path""" - d = set() - for dir in sys.path: - try: - if os.path.isdir(dir): - dir, dircase = makepath(dir) - d.add(dircase) - except TypeError: - continue - return d - - -def addpackage(sitedir, name, known_paths): - """Process a .pth file within the site-packages directory: - For each line in the file, either combine it with sitedir to a path - and add that to known_paths, or execute it if it starts with 'import '. - """ - if known_paths is None: - _init_pathinfo() - reset = 1 - else: - reset = 0 - fullname = os.path.join(sitedir, name) - try: - f = open(fullname, "rU") - except IOError: - return - with f: - for n, line in enumerate(f): - if line.startswith("#"): - continue - try: - if line.startswith(("import ", "import\t")): - exec line - continue - line = line.rstrip() - dir, dircase = makepath(sitedir, line) - if not dircase in known_paths and os.path.exists(dir): - sys.path.append(dir) - known_paths.add(dircase) - except Exception as err: - print >>sys.stderr, "Error processing line {:d} of {}:\n".format( - n+1, fullname) - for record in traceback.format_exception(*sys.exc_info()): - for line in record.splitlines(): - print >>sys.stderr, ' '+line - print >>sys.stderr, "\nRemainder of file ignored" - break - if reset: - known_paths = None - return known_paths - - -def addsitedir(sitedir, known_paths=None): - """Add 'sitedir' argument to sys.path if missing and handle .pth files in - 'sitedir'""" - if known_paths is None: - known_paths = _init_pathinfo() - reset = 1 - else: - reset = 0 - sitedir, sitedircase = makepath(sitedir) - if not sitedircase in known_paths: - sys.path.append(sitedir) # Add path component - try: - names = os.listdir(sitedir) - except os.error: - return - dotpth = os.extsep + "pth" - names = [name for name in names if name.endswith(dotpth)] - for name in sorted(names): - addpackage(sitedir, name, known_paths) - if reset: - known_paths = None - return known_paths - - -def check_enableusersite(): - """Check if user site directory is safe for inclusion - - The function tests for the command line flag (including environment var), - process uid/gid equal to effective uid/gid. - - None: Disabled for security reasons - False: Disabled by user (command line option) - True: Safe and enabled - """ - if sys.flags.no_user_site: - return False - - if hasattr(os, "getuid") and hasattr(os, "geteuid"): - # check process uid == effective uid - if os.geteuid() != os.getuid(): - return None - if hasattr(os, "getgid") and hasattr(os, "getegid"): - # check process gid == effective gid - if os.getegid() != os.getgid(): - return None - - return True - -def getuserbase(): - """Returns the `user base` directory path. - - The `user base` directory can be used to store data. If the global - variable ``USER_BASE`` is not initialized yet, this function will also set - it. - """ - global USER_BASE - if USER_BASE is not None: - return USER_BASE - from sysconfig import get_config_var - USER_BASE = get_config_var('userbase') - return USER_BASE - -def getusersitepackages(): - """Returns the user-specific site-packages directory path. - - If the global variable ``USER_SITE`` is not initialized yet, this - function will also set it. - """ - global USER_SITE - user_base = getuserbase() # this will also set USER_BASE - - if USER_SITE is not None: - return USER_SITE - - from sysconfig import get_path - import os - - if sys.platform == 'darwin': - from sysconfig import get_config_var - if get_config_var('PYTHONFRAMEWORK'): - USER_SITE = get_path('purelib', 'osx_framework_user') - return USER_SITE - - USER_SITE = get_path('purelib', '%s_user' % os.name) - return USER_SITE - -def addusersitepackages(known_paths): - """Add a per user site-package to sys.path - - Each user has its own python directory with site-packages in the - home directory. - """ - # get the per user site-package path - # this call will also make sure USER_BASE and USER_SITE are set - user_site = getusersitepackages() - - if ENABLE_USER_SITE and os.path.isdir(user_site): - addsitedir(user_site, known_paths) - return known_paths - -def getsitepackages(): - """Returns a list containing all global site-packages directories - (and possibly site-python). - - For each directory present in the global ``PREFIXES``, this function - will find its `site-packages` subdirectory depending on the system - environment, and will return a list of full paths. - """ - sitepackages = [] - seen = set() - - for prefix in PREFIXES: - if not prefix or prefix in seen: - continue - seen.add(prefix) - - if sys.platform in ('os2emx', 'riscos'): - sitepackages.append(os.path.join(prefix, "Lib", "site-packages")) - elif os.sep == '/': - sitepackages.append(os.path.join(prefix, "lib", - "python" + sys.version[:3], - "site-packages")) - sitepackages.append(os.path.join(prefix, "lib", "site-python")) - else: - sitepackages.append(prefix) - sitepackages.append(os.path.join(prefix, "lib", "site-packages")) - return sitepackages - -def addsitepackages(known_paths): - """Add site-packages (and possibly site-python) to sys.path""" - for sitedir in getsitepackages(): - if os.path.isdir(sitedir): - addsitedir(sitedir, known_paths) - - return known_paths - -def setBEGINLIBPATH(): - """The OS/2 EMX port has optional extension modules that do double duty - as DLLs (and must use the .DLL file extension) for other extensions. - The library search path needs to be amended so these will be found - during module import. Use BEGINLIBPATH so that these are at the start - of the library search path. - - """ - dllpath = os.path.join(sys.prefix, "Lib", "lib-dynload") - libpath = os.environ['BEGINLIBPATH'].split(';') - if libpath[-1]: - libpath.append(dllpath) - else: - libpath[-1] = dllpath - os.environ['BEGINLIBPATH'] = ';'.join(libpath) - - -def setquit(): - """Define new builtins 'quit' and 'exit'. - - These are objects which make the interpreter exit when called. - The repr of each object contains a hint at how it works. - - """ - if os.sep == ':': - eof = 'Cmd-Q' - elif os.sep == '\\': - eof = 'Ctrl-Z plus Return' - else: - eof = 'Ctrl-D (i.e. EOF)' - - class Quitter(object): - def __init__(self, name): - self.name = name - def __repr__(self): - return 'Use %s() or %s to exit' % (self.name, eof) - def __call__(self, code=None): - # Shells like IDLE catch the SystemExit, but listen when their - # stdin wrapper is closed. - try: - sys.stdin.close() - except: - pass - raise SystemExit(code) - __builtin__.quit = Quitter('quit') - __builtin__.exit = Quitter('exit') - - -class _Printer(object): - """interactive prompt objects for printing the license text, a list of - contributors and the copyright notice.""" - - MAXLINES = 23 - - def __init__(self, name, data, files=(), dirs=()): - self.__name = name - self.__data = data - self.__files = files - self.__dirs = dirs - self.__lines = None - - def __setup(self): - if self.__lines: - return - data = None - for dir in self.__dirs: - for filename in self.__files: - filename = os.path.join(dir, filename) - try: - fp = file(filename, "rU") - data = fp.read() - fp.close() - break - except IOError: - pass - if data: - break - if not data: - data = self.__data - self.__lines = data.split('\n') - self.__linecnt = len(self.__lines) - - def __repr__(self): - self.__setup() - if len(self.__lines) <= self.MAXLINES: - return "\n".join(self.__lines) - else: - return "Type %s() to see the full %s text" % ((self.__name,)*2) - - def __call__(self): - self.__setup() - prompt = 'Hit Return for more, or q (and Return) to quit: ' - lineno = 0 - while 1: - try: - for i in range(lineno, lineno + self.MAXLINES): - print self.__lines[i] - except IndexError: - break - else: - lineno += self.MAXLINES - key = None - while key is None: - key = raw_input(prompt) - if key not in ('', 'q'): - key = None - if key == 'q': - break - -def setcopyright(): - """Set 'copyright' and 'credits' in __builtin__""" - __builtin__.copyright = _Printer("copyright", sys.copyright) - if sys.platform[:4] == 'java': - __builtin__.credits = _Printer( - "credits", - "Jython is maintained by the Jython developers (www.jython.org).") - else: - __builtin__.credits = _Printer("credits", """\ - Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands - for supporting Python development. See www.python.org for more information.""") - here = os.path.dirname(os.__file__) - __builtin__.license = _Printer( - "license", "See https://www.python.org/psf/license/", - ["LICENSE.txt", "LICENSE"], - [os.path.join(here, os.pardir), here, os.curdir]) - - -class _Helper(object): - """Define the builtin 'help'. - This is a wrapper around pydoc.help (with a twist). - - """ - - def __repr__(self): - return "Type help() for interactive help, " \ - "or help(object) for help about object." - def __call__(self, *args, **kwds): - import pydoc - return pydoc.help(*args, **kwds) - -def sethelper(): - __builtin__.help = _Helper() - -def aliasmbcs(): - """On Windows, some default encodings are not provided by Python, - while they are always available as "mbcs" in each locale. Make - them usable by aliasing to "mbcs" in such a case.""" - if sys.platform == 'win32': - import locale, codecs - enc = locale.getdefaultlocale()[1] - if enc.startswith('cp'): # "cp***" ? - try: - codecs.lookup(enc) - except LookupError: - import encodings - encodings._cache[enc] = encodings._unknown - encodings.aliases.aliases[enc] = 'mbcs' - -def setencoding(): - """Set the string encoding used by the Unicode implementation. The - default is 'ascii', but if you're willing to experiment, you can - change this.""" - encoding = "ascii" # Default value set by _PyUnicode_Init() - if 0: - # Enable to support locale aware default string encodings. - import locale - loc = locale.getdefaultlocale() - if loc[1]: - encoding = loc[1] - if 0: - # Enable to switch off string to Unicode coercion and implicit - # Unicode to string conversion. - encoding = "undefined" - if encoding != "ascii": - # On Non-Unicode builds this will raise an AttributeError... - sys.setdefaultencoding(encoding) # Needs Python Unicode build ! - - -def execsitecustomize(): - """Run custom site specific code, if available.""" - try: - import sitecustomize - except ImportError: - pass - except Exception: - if sys.flags.verbose: - sys.excepthook(*sys.exc_info()) - else: - print >>sys.stderr, \ - "'import sitecustomize' failed; use -v for traceback" - - -def execusercustomize(): - """Run custom user specific code, if available.""" - try: - import usercustomize - except ImportError: - pass - except Exception: - if sys.flags.verbose: - sys.excepthook(*sys.exc_info()) - else: - print>>sys.stderr, \ - "'import usercustomize' failed; use -v for traceback" - - -def main(): - global ENABLE_USER_SITE - - abs__file__() - known_paths = removeduppaths() - if ENABLE_USER_SITE is None: - ENABLE_USER_SITE = check_enableusersite() - known_paths = addusersitepackages(known_paths) - known_paths = addsitepackages(known_paths) - if sys.platform == 'os2emx': - setBEGINLIBPATH() - setquit() - setcopyright() - sethelper() - aliasmbcs() - setencoding() - execsitecustomize() - if ENABLE_USER_SITE: - execusercustomize() - # Remove sys.setdefaultencoding() so that users cannot change the - # encoding after initialization. The test for presence is needed when - # this module is run as a script, because this code is executed twice. - if hasattr(sys, "setdefaultencoding"): - del sys.setdefaultencoding - -main() - -def _script(): - help = """\ - %s [--user-base] [--user-site] - - Without arguments print some useful information - With arguments print the value of USER_BASE and/or USER_SITE separated - by '%s'. - - Exit codes with --user-base or --user-site: - 0 - user site directory is enabled - 1 - user site directory is disabled by user - 2 - uses site directory is disabled by super user - or for security reasons - >2 - unknown error - """ - args = sys.argv[1:] - if not args: - print "sys.path = [" - for dir in sys.path: - print " %r," % (dir,) - print "]" - print "USER_BASE: %r (%s)" % (USER_BASE, - "exists" if os.path.isdir(USER_BASE) else "doesn't exist") - print "USER_SITE: %r (%s)" % (USER_SITE, - "exists" if os.path.isdir(USER_SITE) else "doesn't exist") - print "ENABLE_USER_SITE: %r" % ENABLE_USER_SITE - sys.exit(0) - - buffer = [] - if '--user-base' in args: - buffer.append(USER_BASE) - if '--user-site' in args: - buffer.append(USER_SITE) - - if buffer: - print os.pathsep.join(buffer) - if ENABLE_USER_SITE: - sys.exit(0) - elif ENABLE_USER_SITE is False: - sys.exit(1) - elif ENABLE_USER_SITE is None: - sys.exit(2) - else: - sys.exit(3) - else: - import textwrap - print textwrap.dedent(help % (sys.argv[0], os.pathsep)) - sys.exit(10) - -if __name__ == '__main__': - _script() diff --git a/python/Lib/smtpd.py b/python/Lib/smtpd.py deleted file mode 100755 index b4d208b2ee..0000000000 --- a/python/Lib/smtpd.py +++ /dev/null @@ -1,555 +0,0 @@ -#! /usr/bin/env python -"""An RFC 2821 smtp proxy. - -Usage: %(program)s [options] [localhost:localport [remotehost:remoteport]] - -Options: - - --nosetuid - -n - This program generally tries to setuid `nobody', unless this flag is - set. The setuid call will fail if this program is not run as root (in - which case, use this flag). - - --version - -V - Print the version number and exit. - - --class classname - -c classname - Use `classname' as the concrete SMTP proxy class. Uses `PureProxy' by - default. - - --debug - -d - Turn on debugging prints. - - --help - -h - Print this message and exit. - -Version: %(__version__)s - -If localhost is not given then `localhost' is used, and if localport is not -given then 8025 is used. If remotehost is not given then `localhost' is used, -and if remoteport is not given, then 25 is used. -""" - -# Overview: -# -# This file implements the minimal SMTP protocol as defined in RFC 821. It -# has a hierarchy of classes which implement the backend functionality for the -# smtpd. A number of classes are provided: -# -# SMTPServer - the base class for the backend. Raises NotImplementedError -# if you try to use it. -# -# DebuggingServer - simply prints each message it receives on stdout. -# -# PureProxy - Proxies all messages to a real smtpd which does final -# delivery. One known problem with this class is that it doesn't handle -# SMTP errors from the backend server at all. This should be fixed -# (contributions are welcome!). -# -# MailmanProxy - An experimental hack to work with GNU Mailman -# . Using this server as your real incoming smtpd, your -# mailhost will automatically recognize and accept mail destined to Mailman -# lists when those lists are created. Every message not destined for a list -# gets forwarded to a real backend smtpd, as with PureProxy. Again, errors -# are not handled correctly yet. -# -# Please note that this script requires Python 2.0 -# -# Author: Barry Warsaw -# -# TODO: -# -# - support mailbox delivery -# - alias files -# - ESMTP -# - handle error codes from the backend smtpd - -import sys -import os -import errno -import getopt -import time -import socket -import asyncore -import asynchat - -__all__ = ["SMTPServer","DebuggingServer","PureProxy","MailmanProxy"] - -program = sys.argv[0] -__version__ = 'Python SMTP proxy version 0.2' - - -class Devnull: - def write(self, msg): pass - def flush(self): pass - - -DEBUGSTREAM = Devnull() -NEWLINE = '\n' -EMPTYSTRING = '' -COMMASPACE = ', ' - - -def usage(code, msg=''): - print >> sys.stderr, __doc__ % globals() - if msg: - print >> sys.stderr, msg - sys.exit(code) - - -class SMTPChannel(asynchat.async_chat): - COMMAND = 0 - DATA = 1 - - def __init__(self, server, conn, addr): - asynchat.async_chat.__init__(self, conn) - self.__server = server - self.__conn = conn - self.__addr = addr - self.__line = [] - self.__state = self.COMMAND - self.__greeting = 0 - self.__mailfrom = None - self.__rcpttos = [] - self.__data = '' - self.__fqdn = socket.getfqdn() - try: - self.__peer = conn.getpeername() - except socket.error, err: - # a race condition may occur if the other end is closing - # before we can get the peername - self.close() - if err[0] != errno.ENOTCONN: - raise - return - print >> DEBUGSTREAM, 'Peer:', repr(self.__peer) - self.push('220 %s %s' % (self.__fqdn, __version__)) - self.set_terminator('\r\n') - - # Overrides base class for convenience - def push(self, msg): - asynchat.async_chat.push(self, msg + '\r\n') - - # Implementation of base class abstract method - def collect_incoming_data(self, data): - self.__line.append(data) - - # Implementation of base class abstract method - def found_terminator(self): - line = EMPTYSTRING.join(self.__line) - print >> DEBUGSTREAM, 'Data:', repr(line) - self.__line = [] - if self.__state == self.COMMAND: - if not line: - self.push('500 Error: bad syntax') - return - method = None - i = line.find(' ') - if i < 0: - command = line.upper() - arg = None - else: - command = line[:i].upper() - arg = line[i+1:].strip() - method = getattr(self, 'smtp_' + command, None) - if not method: - self.push('502 Error: command "%s" not implemented' % command) - return - method(arg) - return - else: - if self.__state != self.DATA: - self.push('451 Internal confusion') - return - # Remove extraneous carriage returns and de-transparency according - # to RFC 821, Section 4.5.2. - data = [] - for text in line.split('\r\n'): - if text and text[0] == '.': - data.append(text[1:]) - else: - data.append(text) - self.__data = NEWLINE.join(data) - status = self.__server.process_message(self.__peer, - self.__mailfrom, - self.__rcpttos, - self.__data) - self.__rcpttos = [] - self.__mailfrom = None - self.__state = self.COMMAND - self.set_terminator('\r\n') - if not status: - self.push('250 Ok') - else: - self.push(status) - - # SMTP and ESMTP commands - def smtp_HELO(self, arg): - if not arg: - self.push('501 Syntax: HELO hostname') - return - if self.__greeting: - self.push('503 Duplicate HELO/EHLO') - else: - self.__greeting = arg - self.push('250 %s' % self.__fqdn) - - def smtp_NOOP(self, arg): - if arg: - self.push('501 Syntax: NOOP') - else: - self.push('250 Ok') - - def smtp_QUIT(self, arg): - # args is ignored - self.push('221 Bye') - self.close_when_done() - - # factored - def __getaddr(self, keyword, arg): - address = None - keylen = len(keyword) - if arg[:keylen].upper() == keyword: - address = arg[keylen:].strip() - if not address: - pass - elif address[0] == '<' and address[-1] == '>' and address != '<>': - # Addresses can be in the form but watch out - # for null address, e.g. <> - address = address[1:-1] - return address - - def smtp_MAIL(self, arg): - print >> DEBUGSTREAM, '===> MAIL', arg - address = self.__getaddr('FROM:', arg) if arg else None - if not address: - self.push('501 Syntax: MAIL FROM:
') - return - if self.__mailfrom: - self.push('503 Error: nested MAIL command') - return - self.__mailfrom = address - print >> DEBUGSTREAM, 'sender:', self.__mailfrom - self.push('250 Ok') - - def smtp_RCPT(self, arg): - print >> DEBUGSTREAM, '===> RCPT', arg - if not self.__mailfrom: - self.push('503 Error: need MAIL command') - return - address = self.__getaddr('TO:', arg) if arg else None - if not address: - self.push('501 Syntax: RCPT TO:
') - return - self.__rcpttos.append(address) - print >> DEBUGSTREAM, 'recips:', self.__rcpttos - self.push('250 Ok') - - def smtp_RSET(self, arg): - if arg: - self.push('501 Syntax: RSET') - return - # Resets the sender, recipients, and data, but not the greeting - self.__mailfrom = None - self.__rcpttos = [] - self.__data = '' - self.__state = self.COMMAND - self.push('250 Ok') - - def smtp_DATA(self, arg): - if not self.__rcpttos: - self.push('503 Error: need RCPT command') - return - if arg: - self.push('501 Syntax: DATA') - return - self.__state = self.DATA - self.set_terminator('\r\n.\r\n') - self.push('354 End data with .') - - -class SMTPServer(asyncore.dispatcher): - def __init__(self, localaddr, remoteaddr): - self._localaddr = localaddr - self._remoteaddr = remoteaddr - asyncore.dispatcher.__init__(self) - try: - self.create_socket(socket.AF_INET, socket.SOCK_STREAM) - # try to re-use a server port if possible - self.set_reuse_addr() - self.bind(localaddr) - self.listen(5) - except: - # cleanup asyncore.socket_map before raising - self.close() - raise - else: - print >> DEBUGSTREAM, \ - '%s started at %s\n\tLocal addr: %s\n\tRemote addr:%s' % ( - self.__class__.__name__, time.ctime(time.time()), - localaddr, remoteaddr) - - def handle_accept(self): - pair = self.accept() - if pair is not None: - conn, addr = pair - print >> DEBUGSTREAM, 'Incoming connection from %s' % repr(addr) - channel = SMTPChannel(self, conn, addr) - - # API for "doing something useful with the message" - def process_message(self, peer, mailfrom, rcpttos, data): - """Override this abstract method to handle messages from the client. - - peer is a tuple containing (ipaddr, port) of the client that made the - socket connection to our smtp port. - - mailfrom is the raw address the client claims the message is coming - from. - - rcpttos is a list of raw addresses the client wishes to deliver the - message to. - - data is a string containing the entire full text of the message, - headers (if supplied) and all. It has been `de-transparencied' - according to RFC 821, Section 4.5.2. In other words, a line - containing a `.' followed by other text has had the leading dot - removed. - - This function should return None, for a normal `250 Ok' response; - otherwise it returns the desired response string in RFC 821 format. - - """ - raise NotImplementedError - - -class DebuggingServer(SMTPServer): - # Do something with the gathered message - def process_message(self, peer, mailfrom, rcpttos, data): - inheaders = 1 - lines = data.split('\n') - print '---------- MESSAGE FOLLOWS ----------' - for line in lines: - # headers first - if inheaders and not line: - print 'X-Peer:', peer[0] - inheaders = 0 - print line - print '------------ END MESSAGE ------------' - - -class PureProxy(SMTPServer): - def process_message(self, peer, mailfrom, rcpttos, data): - lines = data.split('\n') - # Look for the last header - i = 0 - for line in lines: - if not line: - break - i += 1 - lines.insert(i, 'X-Peer: %s' % peer[0]) - data = NEWLINE.join(lines) - refused = self._deliver(mailfrom, rcpttos, data) - # TBD: what to do with refused addresses? - print >> DEBUGSTREAM, 'we got some refusals:', refused - - def _deliver(self, mailfrom, rcpttos, data): - import smtplib - refused = {} - try: - s = smtplib.SMTP() - s.connect(self._remoteaddr[0], self._remoteaddr[1]) - try: - refused = s.sendmail(mailfrom, rcpttos, data) - finally: - s.quit() - except smtplib.SMTPRecipientsRefused, e: - print >> DEBUGSTREAM, 'got SMTPRecipientsRefused' - refused = e.recipients - except (socket.error, smtplib.SMTPException), e: - print >> DEBUGSTREAM, 'got', e.__class__ - # All recipients were refused. If the exception had an associated - # error code, use it. Otherwise,fake it with a non-triggering - # exception code. - errcode = getattr(e, 'smtp_code', -1) - errmsg = getattr(e, 'smtp_error', 'ignore') - for r in rcpttos: - refused[r] = (errcode, errmsg) - return refused - - -class MailmanProxy(PureProxy): - def process_message(self, peer, mailfrom, rcpttos, data): - from cStringIO import StringIO - from Mailman import Utils - from Mailman import Message - from Mailman import MailList - # If the message is to a Mailman mailing list, then we'll invoke the - # Mailman script directly, without going through the real smtpd. - # Otherwise we'll forward it to the local proxy for disposition. - listnames = [] - for rcpt in rcpttos: - local = rcpt.lower().split('@')[0] - # We allow the following variations on the theme - # listname - # listname-admin - # listname-owner - # listname-request - # listname-join - # listname-leave - parts = local.split('-') - if len(parts) > 2: - continue - listname = parts[0] - if len(parts) == 2: - command = parts[1] - else: - command = '' - if not Utils.list_exists(listname) or command not in ( - '', 'admin', 'owner', 'request', 'join', 'leave'): - continue - listnames.append((rcpt, listname, command)) - # Remove all list recipients from rcpttos and forward what we're not - # going to take care of ourselves. Linear removal should be fine - # since we don't expect a large number of recipients. - for rcpt, listname, command in listnames: - rcpttos.remove(rcpt) - # If there's any non-list destined recipients left, - print >> DEBUGSTREAM, 'forwarding recips:', ' '.join(rcpttos) - if rcpttos: - refused = self._deliver(mailfrom, rcpttos, data) - # TBD: what to do with refused addresses? - print >> DEBUGSTREAM, 'we got refusals:', refused - # Now deliver directly to the list commands - mlists = {} - s = StringIO(data) - msg = Message.Message(s) - # These headers are required for the proper execution of Mailman. All - # MTAs in existence seem to add these if the original message doesn't - # have them. - if not msg.getheader('from'): - msg['From'] = mailfrom - if not msg.getheader('date'): - msg['Date'] = time.ctime(time.time()) - for rcpt, listname, command in listnames: - print >> DEBUGSTREAM, 'sending message to', rcpt - mlist = mlists.get(listname) - if not mlist: - mlist = MailList.MailList(listname, lock=0) - mlists[listname] = mlist - # dispatch on the type of command - if command == '': - # post - msg.Enqueue(mlist, tolist=1) - elif command == 'admin': - msg.Enqueue(mlist, toadmin=1) - elif command == 'owner': - msg.Enqueue(mlist, toowner=1) - elif command == 'request': - msg.Enqueue(mlist, torequest=1) - elif command in ('join', 'leave'): - # TBD: this is a hack! - if command == 'join': - msg['Subject'] = 'subscribe' - else: - msg['Subject'] = 'unsubscribe' - msg.Enqueue(mlist, torequest=1) - - -class Options: - setuid = 1 - classname = 'PureProxy' - - -def parseargs(): - global DEBUGSTREAM - try: - opts, args = getopt.getopt( - sys.argv[1:], 'nVhc:d', - ['class=', 'nosetuid', 'version', 'help', 'debug']) - except getopt.error, e: - usage(1, e) - - options = Options() - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-V', '--version'): - print >> sys.stderr, __version__ - sys.exit(0) - elif opt in ('-n', '--nosetuid'): - options.setuid = 0 - elif opt in ('-c', '--class'): - options.classname = arg - elif opt in ('-d', '--debug'): - DEBUGSTREAM = sys.stderr - - # parse the rest of the arguments - if len(args) < 1: - localspec = 'localhost:8025' - remotespec = 'localhost:25' - elif len(args) < 2: - localspec = args[0] - remotespec = 'localhost:25' - elif len(args) < 3: - localspec = args[0] - remotespec = args[1] - else: - usage(1, 'Invalid arguments: %s' % COMMASPACE.join(args)) - - # split into host/port pairs - i = localspec.find(':') - if i < 0: - usage(1, 'Bad local spec: %s' % localspec) - options.localhost = localspec[:i] - try: - options.localport = int(localspec[i+1:]) - except ValueError: - usage(1, 'Bad local port: %s' % localspec) - i = remotespec.find(':') - if i < 0: - usage(1, 'Bad remote spec: %s' % remotespec) - options.remotehost = remotespec[:i] - try: - options.remoteport = int(remotespec[i+1:]) - except ValueError: - usage(1, 'Bad remote port: %s' % remotespec) - return options - - -if __name__ == '__main__': - options = parseargs() - # Become nobody - classname = options.classname - if "." in classname: - lastdot = classname.rfind(".") - mod = __import__(classname[:lastdot], globals(), locals(), [""]) - classname = classname[lastdot+1:] - else: - import __main__ as mod - class_ = getattr(mod, classname) - proxy = class_((options.localhost, options.localport), - (options.remotehost, options.remoteport)) - if options.setuid: - try: - import pwd - except ImportError: - print >> sys.stderr, \ - 'Cannot import module "pwd"; try running with -n option.' - sys.exit(1) - nobody = pwd.getpwnam('nobody')[2] - try: - os.setuid(nobody) - except OSError, e: - if e.errno != errno.EPERM: raise - print >> sys.stderr, \ - 'Cannot setuid "nobody"; try running with -n option.' - sys.exit(1) - try: - asyncore.loop() - except KeyboardInterrupt: - pass diff --git a/python/Lib/smtplib.py b/python/Lib/smtplib.py deleted file mode 100755 index e1651c0a8b..0000000000 --- a/python/Lib/smtplib.py +++ /dev/null @@ -1,883 +0,0 @@ -#! /usr/bin/env python - -'''SMTP/ESMTP client class. - -This should follow RFC 821 (SMTP), RFC 1869 (ESMTP), RFC 2554 (SMTP -Authentication) and RFC 2487 (Secure SMTP over TLS). - -Notes: - -Please remember, when doing ESMTP, that the names of the SMTP service -extensions are NOT the same thing as the option keywords for the RCPT -and MAIL commands! - -Example: - - >>> import smtplib - >>> s=smtplib.SMTP("localhost") - >>> print s.help() - This is Sendmail version 8.8.4 - Topics: - HELO EHLO MAIL RCPT DATA - RSET NOOP QUIT HELP VRFY - EXPN VERB ETRN DSN - For more info use "HELP ". - To report bugs in the implementation send email to - sendmail-bugs@sendmail.org. - For local information send email to Postmaster at your site. - End of HELP info - >>> s.putcmd("vrfy","someone@here") - >>> s.getreply() - (250, "Somebody OverHere ") - >>> s.quit() -''' - -# Author: The Dragon De Monsyne -# ESMTP support, test code and doc fixes added by -# Eric S. Raymond -# Better RFC 821 compliance (MAIL and RCPT, and CRLF in data) -# by Carey Evans , for picky mail servers. -# RFC 2554 (authentication) support by Gerhard Haering . -# -# This was modified from the Python 1.5 library HTTP lib. - -import socket -import re -import email.utils -import base64 -import hmac -from email.base64mime import encode as encode_base64 -from sys import stderr - -__all__ = ["SMTPException", "SMTPServerDisconnected", "SMTPResponseException", - "SMTPSenderRefused", "SMTPRecipientsRefused", "SMTPDataError", - "SMTPConnectError", "SMTPHeloError", "SMTPAuthenticationError", - "quoteaddr", "quotedata", "SMTP"] - -SMTP_PORT = 25 -SMTP_SSL_PORT = 465 -CRLF = "\r\n" -_MAXLINE = 8192 # more than 8 times larger than RFC 821, 4.5.3 - -OLDSTYLE_AUTH = re.compile(r"auth=(.*)", re.I) - - -# Exception classes used by this module. -class SMTPException(Exception): - """Base class for all exceptions raised by this module.""" - -class SMTPServerDisconnected(SMTPException): - """Not connected to any SMTP server. - - This exception is raised when the server unexpectedly disconnects, - or when an attempt is made to use the SMTP instance before - connecting it to a server. - """ - -class SMTPResponseException(SMTPException): - """Base class for all exceptions that include an SMTP error code. - - These exceptions are generated in some instances when the SMTP - server returns an error code. The error code is stored in the - `smtp_code' attribute of the error, and the `smtp_error' attribute - is set to the error message. - """ - - def __init__(self, code, msg): - self.smtp_code = code - self.smtp_error = msg - self.args = (code, msg) - -class SMTPSenderRefused(SMTPResponseException): - """Sender address refused. - - In addition to the attributes set by on all SMTPResponseException - exceptions, this sets `sender' to the string that the SMTP refused. - """ - - def __init__(self, code, msg, sender): - self.smtp_code = code - self.smtp_error = msg - self.sender = sender - self.args = (code, msg, sender) - -class SMTPRecipientsRefused(SMTPException): - """All recipient addresses refused. - - The errors for each recipient are accessible through the attribute - 'recipients', which is a dictionary of exactly the same sort as - SMTP.sendmail() returns. - """ - - def __init__(self, recipients): - self.recipients = recipients - self.args = (recipients,) - - -class SMTPDataError(SMTPResponseException): - """The SMTP server didn't accept the data.""" - -class SMTPConnectError(SMTPResponseException): - """Error during connection establishment.""" - -class SMTPHeloError(SMTPResponseException): - """The server refused our HELO reply.""" - -class SMTPAuthenticationError(SMTPResponseException): - """Authentication error. - - Most probably the server didn't accept the username/password - combination provided. - """ - - -def quoteaddr(addr): - """Quote a subset of the email addresses defined by RFC 821. - - Should be able to handle anything rfc822.parseaddr can handle. - """ - m = (None, None) - try: - m = email.utils.parseaddr(addr)[1] - except AttributeError: - pass - if m == (None, None): # Indicates parse failure or AttributeError - # something weird here.. punt -ddm - return "<%s>" % addr - elif m is None: - # the sender wants an empty return address - return "<>" - else: - return "<%s>" % m - -def _addr_only(addrstring): - displayname, addr = email.utils.parseaddr(addrstring) - if (displayname, addr) == ('', ''): - # parseaddr couldn't parse it, so use it as is. - return addrstring - return addr - -def quotedata(data): - """Quote data for email. - - Double leading '.', and change Unix newline '\\n', or Mac '\\r' into - Internet CRLF end-of-line. - """ - return re.sub(r'(?m)^\.', '..', - re.sub(r'(?:\r\n|\n|\r(?!\n))', CRLF, data)) - - -try: - import ssl -except ImportError: - _have_ssl = False -else: - class SSLFakeFile: - """A fake file like object that really wraps a SSLObject. - - It only supports what is needed in smtplib. - """ - def __init__(self, sslobj): - self.sslobj = sslobj - - def readline(self, size=-1): - if size < 0: - size = None - str = "" - chr = None - while chr != "\n": - if size is not None and len(str) >= size: - break - chr = self.sslobj.read(1) - if not chr: - break - str += chr - return str - - def close(self): - pass - - _have_ssl = True - -class SMTP: - """This class manages a connection to an SMTP or ESMTP server. - SMTP Objects: - SMTP objects have the following attributes: - helo_resp - This is the message given by the server in response to the - most recent HELO command. - - ehlo_resp - This is the message given by the server in response to the - most recent EHLO command. This is usually multiline. - - does_esmtp - This is a True value _after you do an EHLO command_, if the - server supports ESMTP. - - esmtp_features - This is a dictionary, which, if the server supports ESMTP, - will _after you do an EHLO command_, contain the names of the - SMTP service extensions this server supports, and their - parameters (if any). - - Note, all extension names are mapped to lower case in the - dictionary. - - See each method's docstrings for details. In general, there is a - method of the same name to perform each SMTP command. There is also a - method called 'sendmail' that will do an entire mail transaction. - """ - debuglevel = 0 - file = None - helo_resp = None - ehlo_msg = "ehlo" - ehlo_resp = None - does_esmtp = 0 - default_port = SMTP_PORT - - def __init__(self, host='', port=0, local_hostname=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - """Initialize a new instance. - - If specified, `host' is the name of the remote host to which to - connect. If specified, `port' specifies the port to which to connect. - By default, smtplib.SMTP_PORT is used. If a host is specified the - connect method is called, and if it returns anything other than a - success code an SMTPConnectError is raised. If specified, - `local_hostname` is used as the FQDN of the local host for the - HELO/EHLO command. Otherwise, the local hostname is found using - socket.getfqdn(). - - """ - self.timeout = timeout - self.esmtp_features = {} - if host: - (code, msg) = self.connect(host, port) - if code != 220: - raise SMTPConnectError(code, msg) - if local_hostname is not None: - self.local_hostname = local_hostname - else: - # RFC 2821 says we should use the fqdn in the EHLO/HELO verb, and - # if that can't be calculated, that we should use a domain literal - # instead (essentially an encoded IP address like [A.B.C.D]). - fqdn = socket.getfqdn() - if '.' in fqdn: - self.local_hostname = fqdn - else: - # We can't find an fqdn hostname, so use a domain literal - addr = '127.0.0.1' - try: - addr = socket.gethostbyname(socket.gethostname()) - except socket.gaierror: - pass - self.local_hostname = '[%s]' % addr - - def set_debuglevel(self, debuglevel): - """Set the debug output level. - - A non-false value results in debug messages for connection and for all - messages sent to and received from the server. - - """ - self.debuglevel = debuglevel - - def _get_socket(self, host, port, timeout): - # This makes it simpler for SMTP_SSL to use the SMTP connect code - # and just alter the socket connection bit. - if self.debuglevel > 0: - print>>stderr, 'connect:', (host, port) - return socket.create_connection((host, port), timeout) - - def connect(self, host='localhost', port=0): - """Connect to a host on a given port. - - If the hostname ends with a colon (`:') followed by a number, and - there is no port specified, that suffix will be stripped off and the - number interpreted as the port number to use. - - Note: This method is automatically invoked by __init__, if a host is - specified during instantiation. - - """ - if not port and (host.find(':') == host.rfind(':')): - i = host.rfind(':') - if i >= 0: - host, port = host[:i], host[i + 1:] - try: - port = int(port) - except ValueError: - raise socket.error, "nonnumeric port" - if not port: - port = self.default_port - if self.debuglevel > 0: - print>>stderr, 'connect:', (host, port) - self.sock = self._get_socket(host, port, self.timeout) - (code, msg) = self.getreply() - if self.debuglevel > 0: - print>>stderr, "connect:", msg - return (code, msg) - - def send(self, str): - """Send `str' to the server.""" - if self.debuglevel > 0: - print>>stderr, 'send:', repr(str) - if hasattr(self, 'sock') and self.sock: - try: - self.sock.sendall(str) - except socket.error: - self.close() - raise SMTPServerDisconnected('Server not connected') - else: - raise SMTPServerDisconnected('please run connect() first') - - def putcmd(self, cmd, args=""): - """Send a command to the server.""" - if args == "": - str = '%s%s' % (cmd, CRLF) - else: - str = '%s %s%s' % (cmd, args, CRLF) - self.send(str) - - def getreply(self): - """Get a reply from the server. - - Returns a tuple consisting of: - - - server response code (e.g. '250', or such, if all goes well) - Note: returns -1 if it can't read response code. - - - server response string corresponding to response code (multiline - responses are converted to a single, multiline string). - - Raises SMTPServerDisconnected if end-of-file is reached. - """ - resp = [] - if self.file is None: - self.file = self.sock.makefile('rb') - while 1: - try: - line = self.file.readline(_MAXLINE + 1) - except socket.error as e: - self.close() - raise SMTPServerDisconnected("Connection unexpectedly closed: " - + str(e)) - if line == '': - self.close() - raise SMTPServerDisconnected("Connection unexpectedly closed") - if self.debuglevel > 0: - print>>stderr, 'reply:', repr(line) - if len(line) > _MAXLINE: - raise SMTPResponseException(500, "Line too long.") - resp.append(line[4:].strip()) - code = line[:3] - # Check that the error code is syntactically correct. - # Don't attempt to read a continuation line if it is broken. - try: - errcode = int(code) - except ValueError: - errcode = -1 - break - # Check if multiline response. - if line[3:4] != "-": - break - - errmsg = "\n".join(resp) - if self.debuglevel > 0: - print>>stderr, 'reply: retcode (%s); Msg: %s' % (errcode, errmsg) - return errcode, errmsg - - def docmd(self, cmd, args=""): - """Send a command, and return its response code.""" - self.putcmd(cmd, args) - return self.getreply() - - # std smtp commands - def helo(self, name=''): - """SMTP 'helo' command. - Hostname to send for this command defaults to the FQDN of the local - host. - """ - self.putcmd("helo", name or self.local_hostname) - (code, msg) = self.getreply() - self.helo_resp = msg - return (code, msg) - - def ehlo(self, name=''): - """ SMTP 'ehlo' command. - Hostname to send for this command defaults to the FQDN of the local - host. - """ - self.esmtp_features = {} - self.putcmd(self.ehlo_msg, name or self.local_hostname) - (code, msg) = self.getreply() - # According to RFC1869 some (badly written) - # MTA's will disconnect on an ehlo. Toss an exception if - # that happens -ddm - if code == -1 and len(msg) == 0: - self.close() - raise SMTPServerDisconnected("Server not connected") - self.ehlo_resp = msg - if code != 250: - return (code, msg) - self.does_esmtp = 1 - #parse the ehlo response -ddm - resp = self.ehlo_resp.split('\n') - del resp[0] - for each in resp: - # To be able to communicate with as many SMTP servers as possible, - # we have to take the old-style auth advertisement into account, - # because: - # 1) Else our SMTP feature parser gets confused. - # 2) There are some servers that only advertise the auth methods we - # support using the old style. - auth_match = OLDSTYLE_AUTH.match(each) - if auth_match: - # This doesn't remove duplicates, but that's no problem - self.esmtp_features["auth"] = self.esmtp_features.get("auth", "") \ - + " " + auth_match.groups(0)[0] - continue - - # RFC 1869 requires a space between ehlo keyword and parameters. - # It's actually stricter, in that only spaces are allowed between - # parameters, but were not going to check for that here. Note - # that the space isn't present if there are no parameters. - m = re.match(r'(?P[A-Za-z0-9][A-Za-z0-9\-]*) ?', each) - if m: - feature = m.group("feature").lower() - params = m.string[m.end("feature"):].strip() - if feature == "auth": - self.esmtp_features[feature] = self.esmtp_features.get(feature, "") \ - + " " + params - else: - self.esmtp_features[feature] = params - return (code, msg) - - def has_extn(self, opt): - """Does the server support a given SMTP service extension?""" - return opt.lower() in self.esmtp_features - - def help(self, args=''): - """SMTP 'help' command. - Returns help text from server.""" - self.putcmd("help", args) - return self.getreply()[1] - - def rset(self): - """SMTP 'rset' command -- resets session.""" - return self.docmd("rset") - - def noop(self): - """SMTP 'noop' command -- doesn't do anything :>""" - return self.docmd("noop") - - def mail(self, sender, options=[]): - """SMTP 'mail' command -- begins mail xfer session.""" - optionlist = '' - if options and self.does_esmtp: - optionlist = ' ' + ' '.join(options) - self.putcmd("mail", "FROM:%s%s" % (quoteaddr(sender), optionlist)) - return self.getreply() - - def rcpt(self, recip, options=[]): - """SMTP 'rcpt' command -- indicates 1 recipient for this mail.""" - optionlist = '' - if options and self.does_esmtp: - optionlist = ' ' + ' '.join(options) - self.putcmd("rcpt", "TO:%s%s" % (quoteaddr(recip), optionlist)) - return self.getreply() - - def data(self, msg): - """SMTP 'DATA' command -- sends message data to server. - - Automatically quotes lines beginning with a period per rfc821. - Raises SMTPDataError if there is an unexpected reply to the - DATA command; the return value from this method is the final - response code received when the all data is sent. - """ - self.putcmd("data") - (code, repl) = self.getreply() - if self.debuglevel > 0: - print>>stderr, "data:", (code, repl) - if code != 354: - raise SMTPDataError(code, repl) - else: - q = quotedata(msg) - if q[-2:] != CRLF: - q = q + CRLF - q = q + "." + CRLF - self.send(q) - (code, msg) = self.getreply() - if self.debuglevel > 0: - print>>stderr, "data:", (code, msg) - return (code, msg) - - def verify(self, address): - """SMTP 'verify' command -- checks for address validity.""" - self.putcmd("vrfy", _addr_only(address)) - return self.getreply() - # a.k.a. - vrfy = verify - - def expn(self, address): - """SMTP 'expn' command -- expands a mailing list.""" - self.putcmd("expn", _addr_only(address)) - return self.getreply() - - # some useful methods - - def ehlo_or_helo_if_needed(self): - """Call self.ehlo() and/or self.helo() if needed. - - If there has been no previous EHLO or HELO command this session, this - method tries ESMTP EHLO first. - - This method may raise the following exceptions: - - SMTPHeloError The server didn't reply properly to - the helo greeting. - """ - if self.helo_resp is None and self.ehlo_resp is None: - if not (200 <= self.ehlo()[0] <= 299): - (code, resp) = self.helo() - if not (200 <= code <= 299): - raise SMTPHeloError(code, resp) - - def login(self, user, password): - """Log in on an SMTP server that requires authentication. - - The arguments are: - - user: The user name to authenticate with. - - password: The password for the authentication. - - If there has been no previous EHLO or HELO command this session, this - method tries ESMTP EHLO first. - - This method will return normally if the authentication was successful. - - This method may raise the following exceptions: - - SMTPHeloError The server didn't reply properly to - the helo greeting. - SMTPAuthenticationError The server didn't accept the username/ - password combination. - SMTPException No suitable authentication method was - found. - """ - - def encode_cram_md5(challenge, user, password): - challenge = base64.decodestring(challenge) - response = user + " " + hmac.HMAC(password, challenge).hexdigest() - return encode_base64(response, eol="") - - def encode_plain(user, password): - return encode_base64("\0%s\0%s" % (user, password), eol="") - - - AUTH_PLAIN = "PLAIN" - AUTH_CRAM_MD5 = "CRAM-MD5" - AUTH_LOGIN = "LOGIN" - - self.ehlo_or_helo_if_needed() - - if not self.has_extn("auth"): - raise SMTPException("SMTP AUTH extension not supported by server.") - - # Authentication methods the server supports: - authlist = self.esmtp_features["auth"].split() - - # List of authentication methods we support: from preferred to - # less preferred methods. Except for the purpose of testing the weaker - # ones, we prefer stronger methods like CRAM-MD5: - preferred_auths = [AUTH_CRAM_MD5, AUTH_PLAIN, AUTH_LOGIN] - - # Determine the authentication method we'll use - authmethod = None - for method in preferred_auths: - if method in authlist: - authmethod = method - break - - if authmethod == AUTH_CRAM_MD5: - (code, resp) = self.docmd("AUTH", AUTH_CRAM_MD5) - if code == 503: - # 503 == 'Error: already authenticated' - return (code, resp) - (code, resp) = self.docmd(encode_cram_md5(resp, user, password)) - elif authmethod == AUTH_PLAIN: - (code, resp) = self.docmd("AUTH", - AUTH_PLAIN + " " + encode_plain(user, password)) - elif authmethod == AUTH_LOGIN: - (code, resp) = self.docmd("AUTH", - "%s %s" % (AUTH_LOGIN, encode_base64(user, eol=""))) - if code != 334: - raise SMTPAuthenticationError(code, resp) - (code, resp) = self.docmd(encode_base64(password, eol="")) - elif authmethod is None: - raise SMTPException("No suitable authentication method found.") - if code not in (235, 503): - # 235 == 'Authentication successful' - # 503 == 'Error: already authenticated' - raise SMTPAuthenticationError(code, resp) - return (code, resp) - - def starttls(self, keyfile=None, certfile=None): - """Puts the connection to the SMTP server into TLS mode. - - If there has been no previous EHLO or HELO command this session, this - method tries ESMTP EHLO first. - - If the server supports TLS, this will encrypt the rest of the SMTP - session. If you provide the keyfile and certfile parameters, - the identity of the SMTP server and client can be checked. This, - however, depends on whether the socket module really checks the - certificates. - - This method may raise the following exceptions: - - SMTPHeloError The server didn't reply properly to - the helo greeting. - """ - self.ehlo_or_helo_if_needed() - if not self.has_extn("starttls"): - raise SMTPException("STARTTLS extension not supported by server.") - (resp, reply) = self.docmd("STARTTLS") - if resp == 220: - if not _have_ssl: - raise RuntimeError("No SSL support included in this Python") - self.sock = ssl.wrap_socket(self.sock, keyfile, certfile) - self.file = SSLFakeFile(self.sock) - # RFC 3207: - # The client MUST discard any knowledge obtained from - # the server, such as the list of SMTP service extensions, - # which was not obtained from the TLS negotiation itself. - self.helo_resp = None - self.ehlo_resp = None - self.esmtp_features = {} - self.does_esmtp = 0 - else: - # RFC 3207: - # 501 Syntax error (no parameters allowed) - # 454 TLS not available due to temporary reason - raise SMTPResponseException(resp, reply) - return (resp, reply) - - def sendmail(self, from_addr, to_addrs, msg, mail_options=[], - rcpt_options=[]): - """This command performs an entire mail transaction. - - The arguments are: - - from_addr : The address sending this mail. - - to_addrs : A list of addresses to send this mail to. A bare - string will be treated as a list with 1 address. - - msg : The message to send. - - mail_options : List of ESMTP options (such as 8bitmime) for the - mail command. - - rcpt_options : List of ESMTP options (such as DSN commands) for - all the rcpt commands. - - If there has been no previous EHLO or HELO command this session, this - method tries ESMTP EHLO first. If the server does ESMTP, message size - and each of the specified options will be passed to it. If EHLO - fails, HELO will be tried and ESMTP options suppressed. - - This method will return normally if the mail is accepted for at least - one recipient. It returns a dictionary, with one entry for each - recipient that was refused. Each entry contains a tuple of the SMTP - error code and the accompanying error message sent by the server. - - This method may raise the following exceptions: - - SMTPHeloError The server didn't reply properly to - the helo greeting. - SMTPRecipientsRefused The server rejected ALL recipients - (no mail was sent). - SMTPSenderRefused The server didn't accept the from_addr. - SMTPDataError The server replied with an unexpected - error code (other than a refusal of - a recipient). - - Note: the connection will be open even after an exception is raised. - - Example: - - >>> import smtplib - >>> s=smtplib.SMTP("localhost") - >>> tolist=["one@one.org","two@two.org","three@three.org","four@four.org"] - >>> msg = '''\\ - ... From: Me@my.org - ... Subject: testin'... - ... - ... This is a test ''' - >>> s.sendmail("me@my.org",tolist,msg) - { "three@three.org" : ( 550 ,"User unknown" ) } - >>> s.quit() - - In the above example, the message was accepted for delivery to three - of the four addresses, and one was rejected, with the error code - 550. If all addresses are accepted, then the method will return an - empty dictionary. - - """ - self.ehlo_or_helo_if_needed() - esmtp_opts = [] - if self.does_esmtp: - # Hmmm? what's this? -ddm - # self.esmtp_features['7bit']="" - if self.has_extn('size'): - esmtp_opts.append("size=%d" % len(msg)) - for option in mail_options: - esmtp_opts.append(option) - - (code, resp) = self.mail(from_addr, esmtp_opts) - if code != 250: - self.rset() - raise SMTPSenderRefused(code, resp, from_addr) - senderrs = {} - if isinstance(to_addrs, basestring): - to_addrs = [to_addrs] - for each in to_addrs: - (code, resp) = self.rcpt(each, rcpt_options) - if (code != 250) and (code != 251): - senderrs[each] = (code, resp) - if len(senderrs) == len(to_addrs): - # the server refused all our recipients - self.rset() - raise SMTPRecipientsRefused(senderrs) - (code, resp) = self.data(msg) - if code != 250: - self.rset() - raise SMTPDataError(code, resp) - #if we got here then somebody got our mail - return senderrs - - - def close(self): - """Close the connection to the SMTP server.""" - try: - file = self.file - self.file = None - if file: - file.close() - finally: - sock = self.sock - self.sock = None - if sock: - sock.close() - - - def quit(self): - """Terminate the SMTP session.""" - res = self.docmd("quit") - # A new EHLO is required after reconnecting with connect() - self.ehlo_resp = self.helo_resp = None - self.esmtp_features = {} - self.does_esmtp = False - self.close() - return res - -if _have_ssl: - - class SMTP_SSL(SMTP): - """ This is a subclass derived from SMTP that connects over an SSL - encrypted socket (to use this class you need a socket module that was - compiled with SSL support). If host is not specified, '' (the local - host) is used. If port is omitted, the standard SMTP-over-SSL port - (465) is used. local_hostname has the same meaning as it does in the - SMTP class. keyfile and certfile are also optional - they can contain - a PEM formatted private key and certificate chain file for the SSL - connection. - - """ - - default_port = SMTP_SSL_PORT - - def __init__(self, host='', port=0, local_hostname=None, - keyfile=None, certfile=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - self.keyfile = keyfile - self.certfile = certfile - SMTP.__init__(self, host, port, local_hostname, timeout) - - def _get_socket(self, host, port, timeout): - if self.debuglevel > 0: - print>>stderr, 'connect:', (host, port) - new_socket = socket.create_connection((host, port), timeout) - new_socket = ssl.wrap_socket(new_socket, self.keyfile, self.certfile) - self.file = SSLFakeFile(new_socket) - return new_socket - - __all__.append("SMTP_SSL") - -# -# LMTP extension -# -LMTP_PORT = 2003 - -class LMTP(SMTP): - """LMTP - Local Mail Transfer Protocol - - The LMTP protocol, which is very similar to ESMTP, is heavily based - on the standard SMTP client. It's common to use Unix sockets for - LMTP, so our connect() method must support that as well as a regular - host:port server. local_hostname has the same meaning as it does in - the SMTP class. To specify a Unix socket, you must use an absolute - path as the host, starting with a '/'. - - Authentication is supported, using the regular SMTP mechanism. When - using a Unix socket, LMTP generally don't support or require any - authentication, but your mileage might vary.""" - - ehlo_msg = "lhlo" - - def __init__(self, host='', port=LMTP_PORT, local_hostname=None): - """Initialize a new instance.""" - SMTP.__init__(self, host, port, local_hostname) - - def connect(self, host='localhost', port=0): - """Connect to the LMTP daemon, on either a Unix or a TCP socket.""" - if host[0] != '/': - return SMTP.connect(self, host, port) - - # Handle Unix-domain sockets. - try: - self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - self.sock.connect(host) - except socket.error: - if self.debuglevel > 0: - print>>stderr, 'connect fail:', host - if self.sock: - self.sock.close() - self.sock = None - raise - (code, msg) = self.getreply() - if self.debuglevel > 0: - print>>stderr, "connect:", msg - return (code, msg) - - -# Test the sendmail method, which tests most of the others. -# Note: This always sends to localhost. -if __name__ == '__main__': - import sys - - def prompt(prompt): - sys.stdout.write(prompt + ": ") - return sys.stdin.readline().strip() - - fromaddr = prompt("From") - toaddrs = prompt("To").split(',') - print "Enter message, end with ^D:" - msg = '' - while 1: - line = sys.stdin.readline() - if not line: - break - msg = msg + line - print "Message length is %d" % len(msg) - - server = SMTP('localhost') - server.set_debuglevel(1) - server.sendmail(fromaddr, toaddrs, msg) - server.quit() diff --git a/python/Lib/sndhdr.py b/python/Lib/sndhdr.py deleted file mode 100755 index cc2d6b8b19..0000000000 --- a/python/Lib/sndhdr.py +++ /dev/null @@ -1,228 +0,0 @@ -"""Routines to help recognizing sound files. - -Function whathdr() recognizes various types of sound file headers. -It understands almost all headers that SOX can decode. - -The return tuple contains the following items, in this order: -- file type (as SOX understands it) -- sampling rate (0 if unknown or hard to decode) -- number of channels (0 if unknown or hard to decode) -- number of frames in the file (-1 if unknown or hard to decode) -- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW - -If the file doesn't have a recognizable type, it returns None. -If the file can't be opened, IOError is raised. - -To compute the total time, divide the number of frames by the -sampling rate (a frame contains a sample for each channel). - -Function what() calls whathdr(). (It used to also use some -heuristics for raw data, but this doesn't work very well.) - -Finally, the function test() is a simple main program that calls -what() for all files mentioned on the argument list. For directory -arguments it calls what() for all files in that directory. Default -argument is "." (testing all files in the current directory). The -option -r tells it to recurse down directories found inside -explicitly given directories. -""" - -# The file structure is top-down except that the test program and its -# subroutine come last. - -__all__ = ["what","whathdr"] - -def what(filename): - """Guess the type of a sound file""" - res = whathdr(filename) - return res - - -def whathdr(filename): - """Recognize sound headers""" - f = open(filename, 'rb') - h = f.read(512) - for tf in tests: - res = tf(h, f) - if res: - return res - return None - - -#-----------------------------------# -# Subroutines per sound header type # -#-----------------------------------# - -tests = [] - -def test_aifc(h, f): - import aifc - if h[:4] != 'FORM': - return None - if h[8:12] == 'AIFC': - fmt = 'aifc' - elif h[8:12] == 'AIFF': - fmt = 'aiff' - else: - return None - f.seek(0) - try: - a = aifc.openfp(f, 'r') - except (EOFError, aifc.Error): - return None - return (fmt, a.getframerate(), a.getnchannels(), \ - a.getnframes(), 8*a.getsampwidth()) - -tests.append(test_aifc) - - -def test_au(h, f): - if h[:4] == '.snd': - f = get_long_be - elif h[:4] in ('\0ds.', 'dns.'): - f = get_long_le - else: - return None - type = 'au' - hdr_size = f(h[4:8]) - data_size = f(h[8:12]) - encoding = f(h[12:16]) - rate = f(h[16:20]) - nchannels = f(h[20:24]) - sample_size = 1 # default - if encoding == 1: - sample_bits = 'U' - elif encoding == 2: - sample_bits = 8 - elif encoding == 3: - sample_bits = 16 - sample_size = 2 - else: - sample_bits = '?' - frame_size = sample_size * nchannels - return type, rate, nchannels, data_size//frame_size, sample_bits - -tests.append(test_au) - - -def test_hcom(h, f): - if h[65:69] != 'FSSD' or h[128:132] != 'HCOM': - return None - divisor = get_long_be(h[128+16:128+20]) - return 'hcom', 22050//divisor, 1, -1, 8 - -tests.append(test_hcom) - - -def test_voc(h, f): - if h[:20] != 'Creative Voice File\032': - return None - sbseek = get_short_le(h[20:22]) - rate = 0 - if 0 <= sbseek < 500 and h[sbseek] == '\1': - ratecode = ord(h[sbseek+4]) - rate = int(1000000.0 / (256 - ratecode)) - return 'voc', rate, 1, -1, 8 - -tests.append(test_voc) - - -def test_wav(h, f): - # 'RIFF' 'WAVE' 'fmt ' - if h[:4] != 'RIFF' or h[8:12] != 'WAVE' or h[12:16] != 'fmt ': - return None - style = get_short_le(h[20:22]) - nchannels = get_short_le(h[22:24]) - rate = get_long_le(h[24:28]) - sample_bits = get_short_le(h[34:36]) - return 'wav', rate, nchannels, -1, sample_bits - -tests.append(test_wav) - - -def test_8svx(h, f): - if h[:4] != 'FORM' or h[8:12] != '8SVX': - return None - # Should decode it to get #channels -- assume always 1 - return '8svx', 0, 1, 0, 8 - -tests.append(test_8svx) - - -def test_sndt(h, f): - if h[:5] == 'SOUND': - nsamples = get_long_le(h[8:12]) - rate = get_short_le(h[20:22]) - return 'sndt', rate, 1, nsamples, 8 - -tests.append(test_sndt) - - -def test_sndr(h, f): - if h[:2] == '\0\0': - rate = get_short_le(h[2:4]) - if 4000 <= rate <= 25000: - return 'sndr', rate, 1, -1, 8 - -tests.append(test_sndr) - - -#---------------------------------------------# -# Subroutines to extract numbers from strings # -#---------------------------------------------# - -def get_long_be(s): - return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3]) - -def get_long_le(s): - return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0]) - -def get_short_be(s): - return (ord(s[0])<<8) | ord(s[1]) - -def get_short_le(s): - return (ord(s[1])<<8) | ord(s[0]) - - -#--------------------# -# Small test program # -#--------------------# - -def test(): - import sys - recursive = 0 - if sys.argv[1:] and sys.argv[1] == '-r': - del sys.argv[1:2] - recursive = 1 - try: - if sys.argv[1:]: - testall(sys.argv[1:], recursive, 1) - else: - testall(['.'], recursive, 1) - except KeyboardInterrupt: - sys.stderr.write('\n[Interrupted]\n') - sys.exit(1) - -def testall(list, recursive, toplevel): - import sys - import os - for filename in list: - if os.path.isdir(filename): - print filename + '/:', - if recursive or toplevel: - print 'recursing down:' - import glob - names = glob.glob(os.path.join(filename, '*')) - testall(names, recursive, 0) - else: - print '*** directory (use -r) ***' - else: - print filename + ':', - sys.stdout.flush() - try: - print what(filename) - except IOError: - print '*** not found ***' - -if __name__ == '__main__': - test() diff --git a/python/Lib/socket.py b/python/Lib/socket.py deleted file mode 100755 index 437634cc3b..0000000000 --- a/python/Lib/socket.py +++ /dev/null @@ -1,577 +0,0 @@ -# Wrapper module for _socket, providing some additional facilities -# implemented in Python. - -"""\ -This module provides socket operations and some related functions. -On Unix, it supports IP (Internet Protocol) and Unix domain sockets. -On other systems, it only supports IP. Functions specific for a -socket are available as methods of the socket object. - -Functions: - -socket() -- create a new socket object -socketpair() -- create a pair of new socket objects [*] -fromfd() -- create a socket object from an open file descriptor [*] -gethostname() -- return the current hostname -gethostbyname() -- map a hostname to its IP number -gethostbyaddr() -- map an IP number or hostname to DNS info -getservbyname() -- map a service name and a protocol name to a port number -getprotobyname() -- map a protocol name (e.g. 'tcp') to a number -ntohs(), ntohl() -- convert 16, 32 bit int from network to host byte order -htons(), htonl() -- convert 16, 32 bit int from host to network byte order -inet_aton() -- convert IP addr string (123.45.67.89) to 32-bit packed format -inet_ntoa() -- convert 32-bit packed format IP to string (123.45.67.89) -ssl() -- secure socket layer support (only available if configured) -socket.getdefaulttimeout() -- get the default timeout value -socket.setdefaulttimeout() -- set the default timeout value -create_connection() -- connects to an address, with an optional timeout and - optional source address. - - [*] not available on all platforms! - -Special objects: - -SocketType -- type object for socket objects -error -- exception raised for I/O errors -has_ipv6 -- boolean value indicating if IPv6 is supported - -Integer constants: - -AF_INET, AF_UNIX -- socket domains (first argument to socket() call) -SOCK_STREAM, SOCK_DGRAM, SOCK_RAW -- socket types (second argument) - -Many other constants may be defined; these may be used in calls to -the setsockopt() and getsockopt() methods. -""" - -import _socket -from _socket import * -from functools import partial -from types import MethodType - -try: - import _ssl -except ImportError: - # no SSL support - pass -else: - def ssl(sock, keyfile=None, certfile=None): - # we do an internal import here because the ssl - # module imports the socket module - import ssl as _realssl - warnings.warn("socket.ssl() is deprecated. Use ssl.wrap_socket() instead.", - DeprecationWarning, stacklevel=2) - return _realssl.sslwrap_simple(sock, keyfile, certfile) - - # we need to import the same constants we used to... - from _ssl import SSLError as sslerror - from _ssl import \ - RAND_add, \ - RAND_status, \ - SSL_ERROR_ZERO_RETURN, \ - SSL_ERROR_WANT_READ, \ - SSL_ERROR_WANT_WRITE, \ - SSL_ERROR_WANT_X509_LOOKUP, \ - SSL_ERROR_SYSCALL, \ - SSL_ERROR_SSL, \ - SSL_ERROR_WANT_CONNECT, \ - SSL_ERROR_EOF, \ - SSL_ERROR_INVALID_ERROR_CODE - try: - from _ssl import RAND_egd - except ImportError: - # LibreSSL does not provide RAND_egd - pass - -import os, sys, warnings - -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -try: - import errno -except ImportError: - errno = None -EBADF = getattr(errno, 'EBADF', 9) -EINTR = getattr(errno, 'EINTR', 4) - -__all__ = ["getfqdn", "create_connection"] -__all__.extend(os._get_exports_list(_socket)) - - -_realsocket = socket - -# WSA error codes -if sys.platform.lower().startswith("win"): - errorTab = {} - errorTab[10004] = "The operation was interrupted." - errorTab[10009] = "A bad file handle was passed." - errorTab[10013] = "Permission denied." - errorTab[10014] = "A fault occurred on the network??" # WSAEFAULT - errorTab[10022] = "An invalid operation was attempted." - errorTab[10035] = "The socket operation would block" - errorTab[10036] = "A blocking operation is already in progress." - errorTab[10048] = "The network address is in use." - errorTab[10054] = "The connection has been reset." - errorTab[10058] = "The network has been shut down." - errorTab[10060] = "The operation timed out." - errorTab[10061] = "Connection refused." - errorTab[10063] = "The name is too long." - errorTab[10064] = "The host is down." - errorTab[10065] = "The host is unreachable." - __all__.append("errorTab") - - - -def getfqdn(name=''): - """Get fully qualified domain name from name. - - An empty argument is interpreted as meaning the local host. - - First the hostname returned by gethostbyaddr() is checked, then - possibly existing aliases. In case no FQDN is available, hostname - from gethostname() is returned. - """ - name = name.strip() - if not name or name == '0.0.0.0': - name = gethostname() - try: - hostname, aliases, ipaddrs = gethostbyaddr(name) - except error: - pass - else: - aliases.insert(0, hostname) - for name in aliases: - if '.' in name: - break - else: - name = hostname - return name - - -_socketmethods = ( - 'bind', 'connect', 'connect_ex', 'fileno', 'listen', - 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', - 'sendall', 'setblocking', - 'settimeout', 'gettimeout', 'shutdown') - -if os.name == "nt": - _socketmethods = _socketmethods + ('ioctl',) - -if sys.platform == "riscos": - _socketmethods = _socketmethods + ('sleeptaskw',) - -# All the method names that must be delegated to either the real socket -# object or the _closedsocket object. -_delegate_methods = ("recv", "recvfrom", "recv_into", "recvfrom_into", - "send", "sendto") - -class _closedsocket(object): - __slots__ = [] - def _dummy(*args): - raise error(EBADF, 'Bad file descriptor') - # All _delegate_methods must also be initialized here. - send = recv = recv_into = sendto = recvfrom = recvfrom_into = _dummy - __getattr__ = _dummy - -# Wrapper around platform socket objects. This implements -# a platform-independent dup() functionality. The -# implementation currently relies on reference counting -# to close the underlying socket object. -class _socketobject(object): - - __doc__ = _realsocket.__doc__ - - __slots__ = ["_sock", "__weakref__"] + list(_delegate_methods) - - def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, _sock=None): - if _sock is None: - _sock = _realsocket(family, type, proto) - self._sock = _sock - for method in _delegate_methods: - setattr(self, method, getattr(_sock, method)) - - def close(self, _closedsocket=_closedsocket, - _delegate_methods=_delegate_methods, setattr=setattr): - # This function should not reference any globals. See issue #808164. - self._sock = _closedsocket() - dummy = self._sock._dummy - for method in _delegate_methods: - setattr(self, method, dummy) - close.__doc__ = _realsocket.close.__doc__ - - def accept(self): - sock, addr = self._sock.accept() - return _socketobject(_sock=sock), addr - accept.__doc__ = _realsocket.accept.__doc__ - - def dup(self): - """dup() -> socket object - - Return a new socket object connected to the same system resource.""" - return _socketobject(_sock=self._sock) - - def makefile(self, mode='r', bufsize=-1): - """makefile([mode[, bufsize]]) -> file object - - Return a regular file object corresponding to the socket. The mode - and bufsize arguments are as for the built-in open() function.""" - return _fileobject(self._sock, mode, bufsize) - - family = property(lambda self: self._sock.family, doc="the socket family") - type = property(lambda self: self._sock.type, doc="the socket type") - proto = property(lambda self: self._sock.proto, doc="the socket protocol") - -def meth(name,self,*args): - return getattr(self._sock,name)(*args) - -for _m in _socketmethods: - p = partial(meth,_m) - p.__name__ = _m - p.__doc__ = getattr(_realsocket,_m).__doc__ - m = MethodType(p,None,_socketobject) - setattr(_socketobject,_m,m) - -socket = SocketType = _socketobject - -class _fileobject(object): - """Faux file object attached to a socket object.""" - - default_bufsize = 8192 - name = "" - - __slots__ = ["mode", "bufsize", "softspace", - # "closed" is a property, see below - "_sock", "_rbufsize", "_wbufsize", "_rbuf", "_wbuf", "_wbuf_len", - "_close"] - - def __init__(self, sock, mode='rb', bufsize=-1, close=False): - self._sock = sock - self.mode = mode # Not actually used in this version - if bufsize < 0: - bufsize = self.default_bufsize - self.bufsize = bufsize - self.softspace = False - # _rbufsize is the suggested recv buffer size. It is *strictly* - # obeyed within readline() for recv calls. If it is larger than - # default_bufsize it will be used for recv calls within read(). - if bufsize == 0: - self._rbufsize = 1 - elif bufsize == 1: - self._rbufsize = self.default_bufsize - else: - self._rbufsize = bufsize - self._wbufsize = bufsize - # We use StringIO for the read buffer to avoid holding a list - # of variously sized string objects which have been known to - # fragment the heap due to how they are malloc()ed and often - # realloc()ed down much smaller than their original allocation. - self._rbuf = StringIO() - self._wbuf = [] # A list of strings - self._wbuf_len = 0 - self._close = close - - def _getclosed(self): - return self._sock is None - closed = property(_getclosed, doc="True if the file is closed") - - def close(self): - try: - if self._sock: - self.flush() - finally: - if self._close: - self._sock.close() - self._sock = None - - def __del__(self): - try: - self.close() - except: - # close() may fail if __init__ didn't complete - pass - - def flush(self): - if self._wbuf: - data = "".join(self._wbuf) - self._wbuf = [] - self._wbuf_len = 0 - buffer_size = max(self._rbufsize, self.default_bufsize) - data_size = len(data) - write_offset = 0 - view = memoryview(data) - try: - while write_offset < data_size: - self._sock.sendall(view[write_offset:write_offset+buffer_size]) - write_offset += buffer_size - finally: - if write_offset < data_size: - remainder = data[write_offset:] - del view, data # explicit free - self._wbuf.append(remainder) - self._wbuf_len = len(remainder) - - def fileno(self): - return self._sock.fileno() - - def write(self, data): - data = str(data) # XXX Should really reject non-string non-buffers - if not data: - return - self._wbuf.append(data) - self._wbuf_len += len(data) - if (self._wbufsize == 0 or - (self._wbufsize == 1 and '\n' in data) or - (self._wbufsize > 1 and self._wbuf_len >= self._wbufsize)): - self.flush() - - def writelines(self, list): - # XXX We could do better here for very long lists - # XXX Should really reject non-string non-buffers - lines = filter(None, map(str, list)) - self._wbuf_len += sum(map(len, lines)) - self._wbuf.extend(lines) - if (self._wbufsize <= 1 or - self._wbuf_len >= self._wbufsize): - self.flush() - - def read(self, size=-1): - # Use max, disallow tiny reads in a loop as they are very inefficient. - # We never leave read() with any leftover data from a new recv() call - # in our internal buffer. - rbufsize = max(self._rbufsize, self.default_bufsize) - # Our use of StringIO rather than lists of string objects returned by - # recv() minimizes memory usage and fragmentation that occurs when - # rbufsize is large compared to the typical return value of recv(). - buf = self._rbuf - buf.seek(0, 2) # seek end - if size < 0: - # Read until EOF - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(rbufsize) - except error, e: - if e.args[0] == EINTR: - continue - raise - if not data: - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or EOF seen, whichever comes first - buf_len = buf.tell() - if buf_len >= size: - # Already have size bytes in our buffer? Extract and return. - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - left = size - buf_len - # recv() will malloc the amount of memory given as its - # parameter even though it often returns much less data - # than that. The returned data string is short lived - # as we copy it into a StringIO and free it. This avoids - # fragmentation issues on many platforms. - try: - data = self._sock.recv(left) - except error, e: - if e.args[0] == EINTR: - continue - raise - if not data: - break - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid buffer data copies when: - # - We have no data in our buffer. - # AND - # - Our call to recv returned exactly the - # number of bytes we were asked to read. - return data - if n == left: - buf.write(data) - del data # explicit free - break - assert n <= left, "recv(%d) returned %d bytes" % (left, n) - buf.write(data) - buf_len += n - del data # explicit free - #assert buf_len == buf.tell() - return buf.getvalue() - - def readline(self, size=-1): - buf = self._rbuf - buf.seek(0, 2) # seek end - if buf.tell() > 0: - # check if we already have it in our buffer - buf.seek(0) - bline = buf.readline(size) - if bline.endswith('\n') or len(bline) == size: - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return bline - del bline - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - buf.seek(0) - buffers = [buf.read()] - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - data = None - recv = self._sock.recv - while True: - try: - while data != "\n": - data = recv(1) - if not data: - break - buffers.append(data) - except error, e: - # The try..except to catch EINTR was moved outside the - # recv loop to avoid the per byte overhead. - if e.args[0] == EINTR: - continue - raise - break - return "".join(buffers) - - buf.seek(0, 2) # seek end - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except error, e: - if e.args[0] == EINTR: - continue - raise - if not data: - break - nl = data.find('\n') - if nl >= 0: - nl += 1 - buf.write(data[:nl]) - self._rbuf.write(data[nl:]) - del data - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or \n or EOF seen, whichever comes first - buf.seek(0, 2) # seek end - buf_len = buf.tell() - if buf_len >= size: - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except error, e: - if e.args[0] == EINTR: - continue - raise - if not data: - break - left = size - buf_len - # did we just receive a newline? - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - # save the excess data to _rbuf - self._rbuf.write(data[nl:]) - if buf_len: - buf.write(data[:nl]) - break - else: - # Shortcut. Avoid data copy through buf when returning - # a substring of our first recv(). - return data[:nl] - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid data copy through buf when - # returning exactly all of our first recv(). - return data - if n >= left: - buf.write(data[:left]) - self._rbuf.write(data[left:]) - break - buf.write(data) - buf_len += n - #assert buf_len == buf.tell() - return buf.getvalue() - - def readlines(self, sizehint=0): - total = 0 - list = [] - while True: - line = self.readline() - if not line: - break - list.append(line) - total += len(line) - if sizehint and total >= sizehint: - break - return list - - # Iterator protocols - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if not line: - raise StopIteration - return line - -_GLOBAL_DEFAULT_TIMEOUT = object() - -def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, - source_address=None): - """Connect to *address* and return the socket object. - - Convenience function. Connect to *address* (a 2-tuple ``(host, - port)``) and return the socket object. Passing the optional - *timeout* parameter will set the timeout on the socket instance - before attempting to connect. If no *timeout* is supplied, the - global default timeout setting returned by :func:`getdefaulttimeout` - is used. If *source_address* is set it must be a tuple of (host, port) - for the socket to bind as a source address before making the connection. - A host of '' or port 0 tells the OS to use the default. - """ - - host, port = address - err = None - for res in getaddrinfo(host, port, 0, SOCK_STREAM): - af, socktype, proto, canonname, sa = res - sock = None - try: - sock = socket(af, socktype, proto) - if timeout is not _GLOBAL_DEFAULT_TIMEOUT: - sock.settimeout(timeout) - if source_address: - sock.bind(source_address) - sock.connect(sa) - return sock - - except error as _: - err = _ - if sock is not None: - sock.close() - - if err is not None: - raise err - else: - raise error("getaddrinfo returns an empty list") diff --git a/python/Lib/sqlite3/__init__.py b/python/Lib/sqlite3/__init__.py deleted file mode 100755 index 41ef2b76df..0000000000 --- a/python/Lib/sqlite3/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -#-*- coding: ISO-8859-1 -*- -# pysqlite2/__init__.py: the pysqlite2 package. -# -# Copyright (C) 2005 Gerhard Häring -# -# This file is part of pysqlite. -# -# This software is provided 'as-is', without any express or implied -# warranty. In no event will the authors be held liable for any damages -# arising from the use of this software. -# -# Permission is granted to anyone to use this software for any purpose, -# including commercial applications, and to alter it and redistribute it -# freely, subject to the following restrictions: -# -# 1. The origin of this software must not be misrepresented; you must not -# claim that you wrote the original software. If you use this software -# in a product, an acknowledgment in the product documentation would be -# appreciated but is not required. -# 2. Altered source versions must be plainly marked as such, and must not be -# misrepresented as being the original software. -# 3. This notice may not be removed or altered from any source distribution. - -from dbapi2 import * diff --git a/python/Lib/sqlite3/dbapi2.py b/python/Lib/sqlite3/dbapi2.py deleted file mode 100755 index 0d4dcaf620..0000000000 --- a/python/Lib/sqlite3/dbapi2.py +++ /dev/null @@ -1,90 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# pysqlite2/dbapi2.py: the DB-API 2.0 interface -# -# Copyright (C) 2004-2005 Gerhard Häring -# -# This file is part of pysqlite. -# -# This software is provided 'as-is', without any express or implied -# warranty. In no event will the authors be held liable for any damages -# arising from the use of this software. -# -# Permission is granted to anyone to use this software for any purpose, -# including commercial applications, and to alter it and redistribute it -# freely, subject to the following restrictions: -# -# 1. The origin of this software must not be misrepresented; you must not -# claim that you wrote the original software. If you use this software -# in a product, an acknowledgment in the product documentation would be -# appreciated but is not required. -# 2. Altered source versions must be plainly marked as such, and must not be -# misrepresented as being the original software. -# 3. This notice may not be removed or altered from any source distribution. - -import collections -import datetime -import time - -from _sqlite3 import * - -paramstyle = "qmark" - -threadsafety = 1 - -apilevel = "2.0" - -Date = datetime.date - -Time = datetime.time - -Timestamp = datetime.datetime - -def DateFromTicks(ticks): - return Date(*time.localtime(ticks)[:3]) - -def TimeFromTicks(ticks): - return Time(*time.localtime(ticks)[3:6]) - -def TimestampFromTicks(ticks): - return Timestamp(*time.localtime(ticks)[:6]) - -version_info = tuple([int(x) for x in version.split(".")]) -sqlite_version_info = tuple([int(x) for x in sqlite_version.split(".")]) - -Binary = buffer -collections.Sequence.register(Row) - -def register_adapters_and_converters(): - def adapt_date(val): - return val.isoformat() - - def adapt_datetime(val): - return val.isoformat(" ") - - def convert_date(val): - return datetime.date(*map(int, val.split("-"))) - - def convert_timestamp(val): - datepart, timepart = val.split(" ") - year, month, day = map(int, datepart.split("-")) - timepart_full = timepart.split(".") - hours, minutes, seconds = map(int, timepart_full[0].split(":")) - if len(timepart_full) == 2: - microseconds = int('{:0<6.6}'.format(timepart_full[1].decode())) - else: - microseconds = 0 - - val = datetime.datetime(year, month, day, hours, minutes, seconds, microseconds) - return val - - - register_adapter(datetime.date, adapt_date) - register_adapter(datetime.datetime, adapt_datetime) - register_converter("date", convert_date) - register_converter("timestamp", convert_timestamp) - -register_adapters_and_converters() - -# Clean up namespace - -del(register_adapters_and_converters) diff --git a/python/Lib/sqlite3/dump.py b/python/Lib/sqlite3/dump.py deleted file mode 100755 index e5c5ef226d..0000000000 --- a/python/Lib/sqlite3/dump.py +++ /dev/null @@ -1,70 +0,0 @@ -# Mimic the sqlite3 console shell's .dump command -# Author: Paul Kippes - -# Every identifier in sql is quoted based on a comment in sqlite -# documentation "SQLite adds new keywords from time to time when it -# takes on new features. So to prevent your code from being broken by -# future enhancements, you should normally quote any identifier that -# is an English language word, even if you do not have to." - -def _iterdump(connection): - """ - Returns an iterator to the dump of the database in an SQL text format. - - Used to produce an SQL dump of the database. Useful to save an in-memory - database for later restoration. This function should not be called - directly but instead called from the Connection method, iterdump(). - """ - - cu = connection.cursor() - yield('BEGIN TRANSACTION;') - - # sqlite_master table contains the SQL CREATE statements for the database. - q = """ - SELECT "name", "type", "sql" - FROM "sqlite_master" - WHERE "sql" NOT NULL AND - "type" == 'table' - ORDER BY "name" - """ - schema_res = cu.execute(q) - for table_name, type, sql in schema_res.fetchall(): - if table_name == 'sqlite_sequence': - yield('DELETE FROM "sqlite_sequence";') - elif table_name == 'sqlite_stat1': - yield('ANALYZE "sqlite_master";') - elif table_name.startswith('sqlite_'): - continue - # NOTE: Virtual table support not implemented - #elif sql.startswith('CREATE VIRTUAL TABLE'): - # qtable = table_name.replace("'", "''") - # yield("INSERT INTO sqlite_master(type,name,tbl_name,rootpage,sql)"\ - # "VALUES('table','{0}','{0}',0,'{1}');".format( - # qtable, - # sql.replace("''"))) - else: - yield('%s;' % sql) - - # Build the insert statement for each row of the current table - table_name_ident = table_name.replace('"', '""') - res = cu.execute('PRAGMA table_info("{0}")'.format(table_name_ident)) - column_names = [str(table_info[1]) for table_info in res.fetchall()] - q = """SELECT 'INSERT INTO "{0}" VALUES({1})' FROM "{0}";""".format( - table_name_ident, - ",".join("""'||quote("{0}")||'""".format(col.replace('"', '""')) for col in column_names)) - query_res = cu.execute(q) - for row in query_res: - yield("%s;" % row[0]) - - # Now when the type is 'index', 'trigger', or 'view' - q = """ - SELECT "name", "type", "sql" - FROM "sqlite_master" - WHERE "sql" NOT NULL AND - "type" IN ('index', 'trigger', 'view') - """ - schema_res = cu.execute(q) - for name, type, sql in schema_res.fetchall(): - yield('%s;' % sql) - - yield('COMMIT;') diff --git a/python/Lib/sre.py b/python/Lib/sre.py deleted file mode 100755 index c04576bafa..0000000000 --- a/python/Lib/sre.py +++ /dev/null @@ -1,13 +0,0 @@ -"""This file is only retained for backwards compatibility. -It will be removed in the future. sre was moved to re in version 2.5. -""" - -import warnings -warnings.warn("The sre module is deprecated, please import re.", - DeprecationWarning, 2) - -from re import * -from re import __all__ - -# old pickles expect the _compile() reconstructor in this module -from re import _compile diff --git a/python/Lib/sre_compile.py b/python/Lib/sre_compile.py deleted file mode 100755 index c5a7e89d07..0000000000 --- a/python/Lib/sre_compile.py +++ /dev/null @@ -1,596 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Secret Labs' Regular Expression Engine -# -# convert template to internal format -# -# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. -# -# See the sre.py file for information on usage and redistribution. -# - -"""Internal support module for sre""" - -import _sre, sys -import sre_parse -from sre_constants import * - -assert _sre.MAGIC == MAGIC, "SRE module mismatch" - -if _sre.CODESIZE == 2: - MAXCODE = 65535 -else: - MAXCODE = 0xFFFFFFFFL - -_LITERAL_CODES = set([LITERAL, NOT_LITERAL]) -_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT]) -_SUCCESS_CODES = set([SUCCESS, FAILURE]) -_ASSERT_CODES = set([ASSERT, ASSERT_NOT]) - -# Sets of lowercase characters which have the same uppercase. -_equivalences = ( - # LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I - (0x69, 0x131), # iı - # LATIN SMALL LETTER S, LATIN SMALL LETTER LONG S - (0x73, 0x17f), # sÅ¿ - # MICRO SIGN, GREEK SMALL LETTER MU - (0xb5, 0x3bc), # µμ - # COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI - (0x345, 0x3b9, 0x1fbe), # \u0345ιι - # GREEK SMALL LETTER BETA, GREEK BETA SYMBOL - (0x3b2, 0x3d0), # Î²Ï - # GREEK SMALL LETTER EPSILON, GREEK LUNATE EPSILON SYMBOL - (0x3b5, 0x3f5), # εϵ - # GREEK SMALL LETTER THETA, GREEK THETA SYMBOL - (0x3b8, 0x3d1), # θϑ - # GREEK SMALL LETTER KAPPA, GREEK KAPPA SYMBOL - (0x3ba, 0x3f0), # κϰ - # GREEK SMALL LETTER PI, GREEK PI SYMBOL - (0x3c0, 0x3d6), # πϖ - # GREEK SMALL LETTER RHO, GREEK RHO SYMBOL - (0x3c1, 0x3f1), # Ïϱ - # GREEK SMALL LETTER FINAL SIGMA, GREEK SMALL LETTER SIGMA - (0x3c2, 0x3c3), # ςσ - # GREEK SMALL LETTER PHI, GREEK PHI SYMBOL - (0x3c6, 0x3d5), # φϕ - # LATIN SMALL LETTER S WITH DOT ABOVE, LATIN SMALL LETTER LONG S WITH DOT ABOVE - (0x1e61, 0x1e9b), # ṡẛ -) - -# Maps the lowercase code to lowercase codes which have the same uppercase. -_ignorecase_fixes = {i: tuple(j for j in t if i != j) - for t in _equivalences for i in t} - -def _compile(code, pattern, flags): - # internal: compile a (sub)pattern - emit = code.append - _len = len - LITERAL_CODES = _LITERAL_CODES - REPEATING_CODES = _REPEATING_CODES - SUCCESS_CODES = _SUCCESS_CODES - ASSERT_CODES = _ASSERT_CODES - if (flags & SRE_FLAG_IGNORECASE and - not (flags & SRE_FLAG_LOCALE) and - flags & SRE_FLAG_UNICODE): - fixes = _ignorecase_fixes - else: - fixes = None - for op, av in pattern: - if op in LITERAL_CODES: - if flags & SRE_FLAG_IGNORECASE: - lo = _sre.getlower(av, flags) - if fixes and lo in fixes: - emit(OPCODES[IN_IGNORE]) - skip = _len(code); emit(0) - if op is NOT_LITERAL: - emit(OPCODES[NEGATE]) - for k in (lo,) + fixes[lo]: - emit(OPCODES[LITERAL]) - emit(k) - emit(OPCODES[FAILURE]) - code[skip] = _len(code) - skip - else: - emit(OPCODES[OP_IGNORE[op]]) - emit(lo) - else: - emit(OPCODES[op]) - emit(av) - elif op is IN: - if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) - def fixup(literal, flags=flags): - return _sre.getlower(literal, flags) - else: - emit(OPCODES[op]) - fixup = None - skip = _len(code); emit(0) - _compile_charset(av, flags, code, fixup, fixes) - code[skip] = _len(code) - skip - elif op is ANY: - if flags & SRE_FLAG_DOTALL: - emit(OPCODES[ANY_ALL]) - else: - emit(OPCODES[ANY]) - elif op in REPEATING_CODES: - if flags & SRE_FLAG_TEMPLATE: - raise error, "internal: unsupported template operator" - emit(OPCODES[REPEAT]) - skip = _len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) - code[skip] = _len(code) - skip - elif _simple(av) and op is not REPEAT: - if op is MAX_REPEAT: - emit(OPCODES[REPEAT_ONE]) - else: - emit(OPCODES[MIN_REPEAT_ONE]) - skip = _len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) - code[skip] = _len(code) - skip - else: - emit(OPCODES[REPEAT]) - skip = _len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - code[skip] = _len(code) - skip - if op is MAX_REPEAT: - emit(OPCODES[MAX_UNTIL]) - else: - emit(OPCODES[MIN_UNTIL]) - elif op is SUBPATTERN: - if av[0]: - emit(OPCODES[MARK]) - emit((av[0]-1)*2) - # _compile_info(code, av[1], flags) - _compile(code, av[1], flags) - if av[0]: - emit(OPCODES[MARK]) - emit((av[0]-1)*2+1) - elif op in SUCCESS_CODES: - emit(OPCODES[op]) - elif op in ASSERT_CODES: - emit(OPCODES[op]) - skip = _len(code); emit(0) - if av[0] >= 0: - emit(0) # look ahead - else: - lo, hi = av[1].getwidth() - if lo != hi: - raise error, "look-behind requires fixed-width pattern" - emit(lo) # look behind - _compile(code, av[1], flags) - emit(OPCODES[SUCCESS]) - code[skip] = _len(code) - skip - elif op is CALL: - emit(OPCODES[op]) - skip = _len(code); emit(0) - _compile(code, av, flags) - emit(OPCODES[SUCCESS]) - code[skip] = _len(code) - skip - elif op is AT: - emit(OPCODES[op]) - if flags & SRE_FLAG_MULTILINE: - av = AT_MULTILINE.get(av, av) - if flags & SRE_FLAG_LOCALE: - av = AT_LOCALE.get(av, av) - elif flags & SRE_FLAG_UNICODE: - av = AT_UNICODE.get(av, av) - emit(ATCODES[av]) - elif op is BRANCH: - emit(OPCODES[op]) - tail = [] - tailappend = tail.append - for av in av[1]: - skip = _len(code); emit(0) - # _compile_info(code, av, flags) - _compile(code, av, flags) - emit(OPCODES[JUMP]) - tailappend(_len(code)); emit(0) - code[skip] = _len(code) - skip - emit(0) # end of branch - for tail in tail: - code[tail] = _len(code) - tail - elif op is CATEGORY: - emit(OPCODES[op]) - if flags & SRE_FLAG_LOCALE: - av = CH_LOCALE[av] - elif flags & SRE_FLAG_UNICODE: - av = CH_UNICODE[av] - emit(CHCODES[av]) - elif op is GROUPREF: - if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) - else: - emit(OPCODES[op]) - emit(av-1) - elif op is GROUPREF_EXISTS: - emit(OPCODES[op]) - emit(av[0]-1) - skipyes = _len(code); emit(0) - _compile(code, av[1], flags) - if av[2]: - emit(OPCODES[JUMP]) - skipno = _len(code); emit(0) - code[skipyes] = _len(code) - skipyes + 1 - _compile(code, av[2], flags) - code[skipno] = _len(code) - skipno - else: - code[skipyes] = _len(code) - skipyes + 1 - else: - raise ValueError, ("unsupported operand type", op) - -def _compile_charset(charset, flags, code, fixup=None, fixes=None): - # compile charset subprogram - emit = code.append - for op, av in _optimize_charset(charset, fixup, fixes, - flags & SRE_FLAG_UNICODE): - emit(OPCODES[op]) - if op is NEGATE: - pass - elif op is LITERAL: - emit(av) - elif op is RANGE: - emit(av[0]) - emit(av[1]) - elif op is CHARSET: - code.extend(av) - elif op is BIGCHARSET: - code.extend(av) - elif op is CATEGORY: - if flags & SRE_FLAG_LOCALE: - emit(CHCODES[CH_LOCALE[av]]) - elif flags & SRE_FLAG_UNICODE: - emit(CHCODES[CH_UNICODE[av]]) - else: - emit(CHCODES[av]) - else: - raise error, "internal: unsupported set operator" - emit(OPCODES[FAILURE]) - -def _optimize_charset(charset, fixup, fixes, isunicode): - # internal: optimize character set - out = [] - tail = [] - charmap = bytearray(256) - for op, av in charset: - while True: - try: - if op is LITERAL: - if fixup: - i = fixup(av) - charmap[i] = 1 - if fixes and i in fixes: - for k in fixes[i]: - charmap[k] = 1 - else: - charmap[av] = 1 - elif op is RANGE: - r = range(av[0], av[1]+1) - if fixup: - r = map(fixup, r) - if fixup and fixes: - for i in r: - charmap[i] = 1 - if i in fixes: - for k in fixes[i]: - charmap[k] = 1 - else: - for i in r: - charmap[i] = 1 - elif op is NEGATE: - out.append((op, av)) - else: - tail.append((op, av)) - except IndexError: - if len(charmap) == 256: - # character set contains non-UCS1 character codes - charmap += b'\0' * 0xff00 - continue - # character set contains non-BMP character codes - if fixup and isunicode and op is RANGE: - lo, hi = av - ranges = [av] - # There are only two ranges of cased astral characters: - # 10400-1044F (Deseret) and 118A0-118DF (Warang Citi). - _fixup_range(max(0x10000, lo), min(0x11fff, hi), - ranges, fixup) - for lo, hi in ranges: - if lo == hi: - tail.append((LITERAL, hi)) - else: - tail.append((RANGE, (lo, hi))) - else: - tail.append((op, av)) - break - - # compress character map - runs = [] - q = 0 - while True: - p = charmap.find(b'\1', q) - if p < 0: - break - if len(runs) >= 2: - runs = None - break - q = charmap.find(b'\0', p) - if q < 0: - runs.append((p, len(charmap))) - break - runs.append((p, q)) - if runs is not None: - # use literal/range - for p, q in runs: - if q - p == 1: - out.append((LITERAL, p)) - else: - out.append((RANGE, (p, q - 1))) - out += tail - # if the case was changed or new representation is more compact - if fixup or len(out) < len(charset): - return out - # else original character set is good enough - return charset - - # use bitmap - if len(charmap) == 256: - data = _mk_bitmap(charmap) - out.append((CHARSET, data)) - out += tail - return out - - # To represent a big charset, first a bitmap of all characters in the - # set is constructed. Then, this bitmap is sliced into chunks of 256 - # characters, duplicate chunks are eliminated, and each chunk is - # given a number. In the compiled expression, the charset is - # represented by a 32-bit word sequence, consisting of one word for - # the number of different chunks, a sequence of 256 bytes (64 words) - # of chunk numbers indexed by their original chunk position, and a - # sequence of 256-bit chunks (8 words each). - - # Compression is normally good: in a typical charset, large ranges of - # Unicode will be either completely excluded (e.g. if only cyrillic - # letters are to be matched), or completely included (e.g. if large - # subranges of Kanji match). These ranges will be represented by - # chunks of all one-bits or all zero-bits. - - # Matching can be also done efficiently: the more significant byte of - # the Unicode character is an index into the chunk number, and the - # less significant byte is a bit index in the chunk (just like the - # CHARSET matching). - - # In UCS-4 mode, the BIGCHARSET opcode still supports only subsets - # of the basic multilingual plane; an efficient representation - # for all of Unicode has not yet been developed. - - charmap = bytes(charmap) # should be hashable - comps = {} - mapping = bytearray(256) - block = 0 - data = bytearray() - for i in range(0, 65536, 256): - chunk = charmap[i: i + 256] - if chunk in comps: - mapping[i // 256] = comps[chunk] - else: - mapping[i // 256] = comps[chunk] = block - block += 1 - data += chunk - data = _mk_bitmap(data) - data[0:0] = [block] + _bytes_to_codes(mapping) - out.append((BIGCHARSET, data)) - out += tail - return out - -def _fixup_range(lo, hi, ranges, fixup): - for i in map(fixup, range(lo, hi+1)): - for k, (lo, hi) in enumerate(ranges): - if i < lo: - if l == lo - 1: - ranges[k] = (i, hi) - else: - ranges.insert(k, (i, i)) - break - elif i > hi: - if i == hi + 1: - ranges[k] = (lo, i) - break - else: - break - else: - ranges.append((i, i)) - -_CODEBITS = _sre.CODESIZE * 8 -_BITS_TRANS = b'0' + b'1' * 255 -def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int): - s = bytes(bits).translate(_BITS_TRANS)[::-1] - return [_int(s[i - _CODEBITS: i], 2) - for i in range(len(s), 0, -_CODEBITS)] - -def _bytes_to_codes(b): - # Convert block indices to word array - import array - if _sre.CODESIZE == 2: - code = 'H' - else: - code = 'I' - a = array.array(code, bytes(b)) - assert a.itemsize == _sre.CODESIZE - assert len(a) * a.itemsize == len(b) - return a.tolist() - -def _simple(av): - # check if av is a "simple" operator - lo, hi = av[2].getwidth() - return lo == hi == 1 and av[2][0][0] != SUBPATTERN - -def _compile_info(code, pattern, flags): - # internal: compile an info block. in the current version, - # this contains min/max pattern width, and an optional literal - # prefix or a character map - lo, hi = pattern.getwidth() - if lo == 0: - return # not worth it - # look for a literal prefix - prefix = [] - prefixappend = prefix.append - prefix_skip = 0 - charset = [] # not used - charsetappend = charset.append - if not (flags & SRE_FLAG_IGNORECASE): - # look for literal prefix - for op, av in pattern.data: - if op is LITERAL: - if len(prefix) == prefix_skip: - prefix_skip = prefix_skip + 1 - prefixappend(av) - elif op is SUBPATTERN and len(av[1]) == 1: - op, av = av[1][0] - if op is LITERAL: - prefixappend(av) - else: - break - else: - break - # if no prefix, look for charset prefix - if not prefix and pattern.data: - op, av = pattern.data[0] - if op is SUBPATTERN and av[1]: - op, av = av[1][0] - if op is LITERAL: - charsetappend((op, av)) - elif op is BRANCH: - c = [] - cappend = c.append - for p in av[1]: - if not p: - break - op, av = p[0] - if op is LITERAL: - cappend((op, av)) - else: - break - else: - charset = c - elif op is BRANCH: - c = [] - cappend = c.append - for p in av[1]: - if not p: - break - op, av = p[0] - if op is LITERAL: - cappend((op, av)) - else: - break - else: - charset = c - elif op is IN: - charset = av -## if prefix: -## print "*** PREFIX", prefix, prefix_skip -## if charset: -## print "*** CHARSET", charset - # add an info block - emit = code.append - emit(OPCODES[INFO]) - skip = len(code); emit(0) - # literal flag - mask = 0 - if prefix: - mask = SRE_INFO_PREFIX - if len(prefix) == prefix_skip == len(pattern.data): - mask = mask + SRE_INFO_LITERAL - elif charset: - mask = mask + SRE_INFO_CHARSET - emit(mask) - # pattern length - if lo < MAXCODE: - emit(lo) - else: - emit(MAXCODE) - prefix = prefix[:MAXCODE] - if hi < MAXCODE: - emit(hi) - else: - emit(0) - # add literal prefix - if prefix: - emit(len(prefix)) # length - emit(prefix_skip) # skip - code.extend(prefix) - # generate overlap table - table = [-1] + ([0]*len(prefix)) - for i in xrange(len(prefix)): - table[i+1] = table[i]+1 - while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]: - table[i+1] = table[table[i+1]-1]+1 - code.extend(table[1:]) # don't store first entry - elif charset: - _compile_charset(charset, flags, code) - code[skip] = len(code) - skip - -try: - unicode -except NameError: - STRING_TYPES = (type(""),) -else: - STRING_TYPES = (type(""), type(unicode(""))) - -def isstring(obj): - for tp in STRING_TYPES: - if isinstance(obj, tp): - return 1 - return 0 - -def _code(p, flags): - - flags = p.pattern.flags | flags - code = [] - - # compile info block - _compile_info(code, p, flags) - - # compile the pattern - _compile(code, p.data, flags) - - code.append(OPCODES[SUCCESS]) - - return code - -def compile(p, flags=0): - # internal: convert pattern list to internal format - - if isstring(p): - pattern = p - p = sre_parse.parse(p, flags) - else: - pattern = None - - code = _code(p, flags) - - # print code - - # XXX: get rid of this limitation! - if p.pattern.groups > 100: - raise AssertionError( - "sorry, but this version only supports 100 named groups" - ) - - # map in either direction - groupindex = p.pattern.groupdict - indexgroup = [None] * p.pattern.groups - for k, i in groupindex.items(): - indexgroup[i] = k - - return _sre.compile( - pattern, flags | p.pattern.flags, code, - p.pattern.groups-1, - groupindex, indexgroup - ) diff --git a/python/Lib/sre_constants.py b/python/Lib/sre_constants.py deleted file mode 100755 index 69224e274c..0000000000 --- a/python/Lib/sre_constants.py +++ /dev/null @@ -1,263 +0,0 @@ -# -# Secret Labs' Regular Expression Engine -# -# various symbols used by the regular expression engine. -# run this script to update the _sre include files! -# -# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. -# -# See the sre.py file for information on usage and redistribution. -# - -"""Internal support module for sre""" - -# update when constants are added or removed - -MAGIC = 20031017 - -try: - from _sre import MAXREPEAT -except ImportError: - import _sre - MAXREPEAT = _sre.MAXREPEAT = 65535 - -# SRE standard exception (access as sre.error) -# should this really be here? - -class error(Exception): - pass - -# operators - -FAILURE = "failure" -SUCCESS = "success" - -ANY = "any" -ANY_ALL = "any_all" -ASSERT = "assert" -ASSERT_NOT = "assert_not" -AT = "at" -BIGCHARSET = "bigcharset" -BRANCH = "branch" -CALL = "call" -CATEGORY = "category" -CHARSET = "charset" -GROUPREF = "groupref" -GROUPREF_IGNORE = "groupref_ignore" -GROUPREF_EXISTS = "groupref_exists" -IN = "in" -IN_IGNORE = "in_ignore" -INFO = "info" -JUMP = "jump" -LITERAL = "literal" -LITERAL_IGNORE = "literal_ignore" -MARK = "mark" -MAX_REPEAT = "max_repeat" -MAX_UNTIL = "max_until" -MIN_REPEAT = "min_repeat" -MIN_UNTIL = "min_until" -NEGATE = "negate" -NOT_LITERAL = "not_literal" -NOT_LITERAL_IGNORE = "not_literal_ignore" -RANGE = "range" -REPEAT = "repeat" -REPEAT_ONE = "repeat_one" -SUBPATTERN = "subpattern" -MIN_REPEAT_ONE = "min_repeat_one" - -# positions -AT_BEGINNING = "at_beginning" -AT_BEGINNING_LINE = "at_beginning_line" -AT_BEGINNING_STRING = "at_beginning_string" -AT_BOUNDARY = "at_boundary" -AT_NON_BOUNDARY = "at_non_boundary" -AT_END = "at_end" -AT_END_LINE = "at_end_line" -AT_END_STRING = "at_end_string" -AT_LOC_BOUNDARY = "at_loc_boundary" -AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" -AT_UNI_BOUNDARY = "at_uni_boundary" -AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" - -# categories -CATEGORY_DIGIT = "category_digit" -CATEGORY_NOT_DIGIT = "category_not_digit" -CATEGORY_SPACE = "category_space" -CATEGORY_NOT_SPACE = "category_not_space" -CATEGORY_WORD = "category_word" -CATEGORY_NOT_WORD = "category_not_word" -CATEGORY_LINEBREAK = "category_linebreak" -CATEGORY_NOT_LINEBREAK = "category_not_linebreak" -CATEGORY_LOC_WORD = "category_loc_word" -CATEGORY_LOC_NOT_WORD = "category_loc_not_word" -CATEGORY_UNI_DIGIT = "category_uni_digit" -CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" -CATEGORY_UNI_SPACE = "category_uni_space" -CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" -CATEGORY_UNI_WORD = "category_uni_word" -CATEGORY_UNI_NOT_WORD = "category_uni_not_word" -CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" -CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" - -OPCODES = [ - - # failure=0 success=1 (just because it looks better that way :-) - FAILURE, SUCCESS, - - ANY, ANY_ALL, - ASSERT, ASSERT_NOT, - AT, - BRANCH, - CALL, - CATEGORY, - CHARSET, BIGCHARSET, - GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, - IN, IN_IGNORE, - INFO, - JUMP, - LITERAL, LITERAL_IGNORE, - MARK, - MAX_UNTIL, - MIN_UNTIL, - NOT_LITERAL, NOT_LITERAL_IGNORE, - NEGATE, - RANGE, - REPEAT, - REPEAT_ONE, - SUBPATTERN, - MIN_REPEAT_ONE - -] - -ATCODES = [ - AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, - AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, - AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, - AT_UNI_NON_BOUNDARY -] - -CHCODES = [ - CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, - CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, - CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, - CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, - CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, - CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, - CATEGORY_UNI_NOT_LINEBREAK -] - -def makedict(list): - d = {} - i = 0 - for item in list: - d[item] = i - i = i + 1 - return d - -OPCODES = makedict(OPCODES) -ATCODES = makedict(ATCODES) -CHCODES = makedict(CHCODES) - -# replacement operations for "ignore case" mode -OP_IGNORE = { - GROUPREF: GROUPREF_IGNORE, - IN: IN_IGNORE, - LITERAL: LITERAL_IGNORE, - NOT_LITERAL: NOT_LITERAL_IGNORE -} - -AT_MULTILINE = { - AT_BEGINNING: AT_BEGINNING_LINE, - AT_END: AT_END_LINE -} - -AT_LOCALE = { - AT_BOUNDARY: AT_LOC_BOUNDARY, - AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY -} - -AT_UNICODE = { - AT_BOUNDARY: AT_UNI_BOUNDARY, - AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY -} - -CH_LOCALE = { - CATEGORY_DIGIT: CATEGORY_DIGIT, - CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, - CATEGORY_SPACE: CATEGORY_SPACE, - CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, - CATEGORY_WORD: CATEGORY_LOC_WORD, - CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, - CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, - CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK -} - -CH_UNICODE = { - CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, - CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, - CATEGORY_SPACE: CATEGORY_UNI_SPACE, - CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, - CATEGORY_WORD: CATEGORY_UNI_WORD, - CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, - CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, - CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK -} - -# flags -SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) -SRE_FLAG_IGNORECASE = 2 # case insensitive -SRE_FLAG_LOCALE = 4 # honour system locale -SRE_FLAG_MULTILINE = 8 # treat target as multiline string -SRE_FLAG_DOTALL = 16 # treat target as a single string -SRE_FLAG_UNICODE = 32 # use unicode locale -SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments -SRE_FLAG_DEBUG = 128 # debugging - -# flags for INFO primitive -SRE_INFO_PREFIX = 1 # has prefix -SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) -SRE_INFO_CHARSET = 4 # pattern starts with character from given set - -if __name__ == "__main__": - def dump(f, d, prefix): - items = d.items() - items.sort(key=lambda a: a[1]) - for k, v in items: - f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) - f = open("sre_constants.h", "w") - f.write("""\ -/* - * Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * NOTE: This file is generated by sre_constants.py. If you need - * to change anything in here, edit sre_constants.py and run it. - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * See the _sre.c file for information on usage and redistribution. - */ - -""") - - f.write("#define SRE_MAGIC %d\n" % MAGIC) - - dump(f, OPCODES, "SRE_OP") - dump(f, ATCODES, "SRE") - dump(f, CHCODES, "SRE") - - f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) - f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) - f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) - f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) - f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) - f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) - f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) - - f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) - f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) - f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) - - f.close() - print "done" diff --git a/python/Lib/sre_parse.py b/python/Lib/sre_parse.py deleted file mode 100755 index 75f488b547..0000000000 --- a/python/Lib/sre_parse.py +++ /dev/null @@ -1,834 +0,0 @@ -# -# Secret Labs' Regular Expression Engine -# -# convert re-style regular expression to sre pattern -# -# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. -# -# See the sre.py file for information on usage and redistribution. -# - -"""Internal support module for sre""" - -# XXX: show string offset and offending character for all errors - -import sys - -from sre_constants import * - -SPECIAL_CHARS = ".\\[{()*+?^$|" -REPEAT_CHARS = "*+?{" - -DIGITS = set("0123456789") - -OCTDIGITS = set("01234567") -HEXDIGITS = set("0123456789abcdefABCDEF") - -WHITESPACE = set(" \t\n\r\v\f") - -ESCAPES = { - r"\a": (LITERAL, ord("\a")), - r"\b": (LITERAL, ord("\b")), - r"\f": (LITERAL, ord("\f")), - r"\n": (LITERAL, ord("\n")), - r"\r": (LITERAL, ord("\r")), - r"\t": (LITERAL, ord("\t")), - r"\v": (LITERAL, ord("\v")), - r"\\": (LITERAL, ord("\\")) -} - -CATEGORIES = { - r"\A": (AT, AT_BEGINNING_STRING), # start of string - r"\b": (AT, AT_BOUNDARY), - r"\B": (AT, AT_NON_BOUNDARY), - r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), - r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), - r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), - r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), - r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), - r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), - r"\Z": (AT, AT_END_STRING), # end of string -} - -FLAGS = { - # standard flags - "i": SRE_FLAG_IGNORECASE, - "L": SRE_FLAG_LOCALE, - "m": SRE_FLAG_MULTILINE, - "s": SRE_FLAG_DOTALL, - "x": SRE_FLAG_VERBOSE, - # extensions - "t": SRE_FLAG_TEMPLATE, - "u": SRE_FLAG_UNICODE, -} - -class Pattern: - # master pattern object. keeps track of global attributes - def __init__(self): - self.flags = 0 - self.open = [] - self.groups = 1 - self.groupdict = {} - self.lookbehind = 0 - - def opengroup(self, name=None): - gid = self.groups - self.groups = gid + 1 - if name is not None: - ogid = self.groupdict.get(name, None) - if ogid is not None: - raise error, ("redefinition of group name %s as group %d; " - "was group %d" % (repr(name), gid, ogid)) - self.groupdict[name] = gid - self.open.append(gid) - return gid - def closegroup(self, gid): - self.open.remove(gid) - def checkgroup(self, gid): - return gid < self.groups and gid not in self.open - -class SubPattern: - # a subpattern, in intermediate form - def __init__(self, pattern, data=None): - self.pattern = pattern - if data is None: - data = [] - self.data = data - self.width = None - def dump(self, level=0): - seqtypes = (tuple, list) - for op, av in self.data: - print level*" " + op, - if op == IN: - # member sublanguage - print - for op, a in av: - print (level+1)*" " + op, a - elif op == BRANCH: - print - for i, a in enumerate(av[1]): - if i: - print level*" " + "or" - a.dump(level+1) - elif op == GROUPREF_EXISTS: - condgroup, item_yes, item_no = av - print condgroup - item_yes.dump(level+1) - if item_no: - print level*" " + "else" - item_no.dump(level+1) - elif isinstance(av, seqtypes): - nl = 0 - for a in av: - if isinstance(a, SubPattern): - if not nl: - print - a.dump(level+1) - nl = 1 - else: - print a, - nl = 0 - if not nl: - print - else: - print av - def __repr__(self): - return repr(self.data) - def __len__(self): - return len(self.data) - def __delitem__(self, index): - del self.data[index] - def __getitem__(self, index): - if isinstance(index, slice): - return SubPattern(self.pattern, self.data[index]) - return self.data[index] - def __setitem__(self, index, code): - self.data[index] = code - def insert(self, index, code): - self.data.insert(index, code) - def append(self, code): - self.data.append(code) - def getwidth(self): - # determine the width (min, max) for this subpattern - if self.width: - return self.width - lo = hi = 0 - UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY) - REPEATCODES = (MIN_REPEAT, MAX_REPEAT) - for op, av in self.data: - if op is BRANCH: - i = MAXREPEAT - 1 - j = 0 - for av in av[1]: - l, h = av.getwidth() - i = min(i, l) - j = max(j, h) - lo = lo + i - hi = hi + j - elif op is CALL: - i, j = av.getwidth() - lo = lo + i - hi = hi + j - elif op is SUBPATTERN: - i, j = av[1].getwidth() - lo = lo + i - hi = hi + j - elif op in REPEATCODES: - i, j = av[2].getwidth() - lo = lo + i * av[0] - hi = hi + j * av[1] - elif op in UNITCODES: - lo = lo + 1 - hi = hi + 1 - elif op == SUCCESS: - break - self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) - return self.width - -class Tokenizer: - def __init__(self, string): - self.string = string - self.index = 0 - self.__next() - def __next(self): - if self.index >= len(self.string): - self.next = None - return - char = self.string[self.index] - if char[0] == "\\": - try: - c = self.string[self.index + 1] - except IndexError: - raise error, "bogus escape (end of line)" - char = char + c - self.index = self.index + len(char) - self.next = char - def match(self, char, skip=1): - if char == self.next: - if skip: - self.__next() - return 1 - return 0 - def get(self): - this = self.next - self.__next() - return this - def tell(self): - return self.index, self.next - def seek(self, index): - self.index, self.next = index - -def isident(char): - return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" - -def isdigit(char): - return "0" <= char <= "9" - -def isname(name): - # check that group name is a valid string - if not isident(name[0]): - return False - for char in name[1:]: - if not isident(char) and not isdigit(char): - return False - return True - -def _class_escape(source, escape): - # handle escape code inside character class - code = ESCAPES.get(escape) - if code: - return code - code = CATEGORIES.get(escape) - if code and code[0] == IN: - return code - try: - c = escape[1:2] - if c == "x": - # hexadecimal escape (exactly two digits) - while source.next in HEXDIGITS and len(escape) < 4: - escape = escape + source.get() - escape = escape[2:] - if len(escape) != 2: - raise error, "bogus escape: %s" % repr("\\" + escape) - return LITERAL, int(escape, 16) & 0xff - elif c in OCTDIGITS: - # octal escape (up to three digits) - while source.next in OCTDIGITS and len(escape) < 4: - escape = escape + source.get() - escape = escape[1:] - return LITERAL, int(escape, 8) & 0xff - elif c in DIGITS: - raise error, "bogus escape: %s" % repr(escape) - if len(escape) == 2: - return LITERAL, ord(escape[1]) - except ValueError: - pass - raise error, "bogus escape: %s" % repr(escape) - -def _escape(source, escape, state): - # handle escape code in expression - code = CATEGORIES.get(escape) - if code: - return code - code = ESCAPES.get(escape) - if code: - return code - try: - c = escape[1:2] - if c == "x": - # hexadecimal escape - while source.next in HEXDIGITS and len(escape) < 4: - escape = escape + source.get() - if len(escape) != 4: - raise ValueError - return LITERAL, int(escape[2:], 16) & 0xff - elif c == "0": - # octal escape - while source.next in OCTDIGITS and len(escape) < 4: - escape = escape + source.get() - return LITERAL, int(escape[1:], 8) & 0xff - elif c in DIGITS: - # octal escape *or* decimal group reference (sigh) - if source.next in DIGITS: - escape = escape + source.get() - if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and - source.next in OCTDIGITS): - # got three octal digits; this is an octal escape - escape = escape + source.get() - return LITERAL, int(escape[1:], 8) & 0xff - # not an octal escape, so this is a group reference - group = int(escape[1:]) - if group < state.groups: - if not state.checkgroup(group): - raise error, "cannot refer to open group" - if state.lookbehind: - import warnings - warnings.warn('group references in lookbehind ' - 'assertions are not supported', - RuntimeWarning) - return GROUPREF, group - raise ValueError - if len(escape) == 2: - return LITERAL, ord(escape[1]) - except ValueError: - pass - raise error, "bogus escape: %s" % repr(escape) - -def _parse_sub(source, state, nested=1): - # parse an alternation: a|b|c - - items = [] - itemsappend = items.append - sourcematch = source.match - while 1: - itemsappend(_parse(source, state)) - if sourcematch("|"): - continue - if not nested: - break - if not source.next or sourcematch(")", 0): - break - else: - raise error, "pattern not properly closed" - - if len(items) == 1: - return items[0] - - subpattern = SubPattern(state) - subpatternappend = subpattern.append - - # check if all items share a common prefix - while 1: - prefix = None - for item in items: - if not item: - break - if prefix is None: - prefix = item[0] - elif item[0] != prefix: - break - else: - # all subitems start with a common "prefix". - # move it out of the branch - for item in items: - del item[0] - subpatternappend(prefix) - continue # check next one - break - - # check if the branch can be replaced by a character set - for item in items: - if len(item) != 1 or item[0][0] != LITERAL: - break - else: - # we can store this as a character set instead of a - # branch (the compiler may optimize this even more) - set = [] - setappend = set.append - for item in items: - setappend(item[0]) - subpatternappend((IN, set)) - return subpattern - - subpattern.append((BRANCH, (None, items))) - return subpattern - -def _parse_sub_cond(source, state, condgroup): - item_yes = _parse(source, state) - if source.match("|"): - item_no = _parse(source, state) - if source.match("|"): - raise error, "conditional backref with more than two branches" - else: - item_no = None - if source.next and not source.match(")", 0): - raise error, "pattern not properly closed" - subpattern = SubPattern(state) - subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) - return subpattern - -_PATTERNENDERS = set("|)") -_ASSERTCHARS = set("=!<") -_LOOKBEHINDASSERTCHARS = set("=!") -_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT]) - -def _parse(source, state): - # parse a simple pattern - subpattern = SubPattern(state) - - # precompute constants into local variables - subpatternappend = subpattern.append - sourceget = source.get - sourcematch = source.match - _len = len - PATTERNENDERS = _PATTERNENDERS - ASSERTCHARS = _ASSERTCHARS - LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS - REPEATCODES = _REPEATCODES - - while 1: - - if source.next in PATTERNENDERS: - break # end of subpattern - this = sourceget() - if this is None: - break # end of pattern - - if state.flags & SRE_FLAG_VERBOSE: - # skip whitespace and comments - if this in WHITESPACE: - continue - if this == "#": - while 1: - this = sourceget() - if this in (None, "\n"): - break - continue - - if this and this[0] not in SPECIAL_CHARS: - subpatternappend((LITERAL, ord(this))) - - elif this == "[": - # character set - set = [] - setappend = set.append -## if sourcematch(":"): -## pass # handle character classes - if sourcematch("^"): - setappend((NEGATE, None)) - # check remaining characters - start = set[:] - while 1: - this = sourceget() - if this == "]" and set != start: - break - elif this and this[0] == "\\": - code1 = _class_escape(source, this) - elif this: - code1 = LITERAL, ord(this) - else: - raise error, "unexpected end of regular expression" - if sourcematch("-"): - # potential range - this = sourceget() - if this == "]": - if code1[0] is IN: - code1 = code1[1][0] - setappend(code1) - setappend((LITERAL, ord("-"))) - break - elif this: - if this[0] == "\\": - code2 = _class_escape(source, this) - else: - code2 = LITERAL, ord(this) - if code1[0] != LITERAL or code2[0] != LITERAL: - raise error, "bad character range" - lo = code1[1] - hi = code2[1] - if hi < lo: - raise error, "bad character range" - setappend((RANGE, (lo, hi))) - else: - raise error, "unexpected end of regular expression" - else: - if code1[0] is IN: - code1 = code1[1][0] - setappend(code1) - - # XXX: should move set optimization to compiler! - if _len(set)==1 and set[0][0] is LITERAL: - subpatternappend(set[0]) # optimization - elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL: - subpatternappend((NOT_LITERAL, set[1][1])) # optimization - else: - # XXX: should add charmap optimization here - subpatternappend((IN, set)) - - elif this and this[0] in REPEAT_CHARS: - # repeat previous item - if this == "?": - min, max = 0, 1 - elif this == "*": - min, max = 0, MAXREPEAT - - elif this == "+": - min, max = 1, MAXREPEAT - elif this == "{": - if source.next == "}": - subpatternappend((LITERAL, ord(this))) - continue - here = source.tell() - min, max = 0, MAXREPEAT - lo = hi = "" - while source.next in DIGITS: - lo = lo + source.get() - if sourcematch(","): - while source.next in DIGITS: - hi = hi + sourceget() - else: - hi = lo - if not sourcematch("}"): - subpatternappend((LITERAL, ord(this))) - source.seek(here) - continue - if lo: - min = int(lo) - if min >= MAXREPEAT: - raise OverflowError("the repetition number is too large") - if hi: - max = int(hi) - if max >= MAXREPEAT: - raise OverflowError("the repetition number is too large") - if max < min: - raise error("bad repeat interval") - else: - raise error, "not supported" - # figure out which item to repeat - if subpattern: - item = subpattern[-1:] - else: - item = None - if not item or (_len(item) == 1 and item[0][0] == AT): - raise error, "nothing to repeat" - if item[0][0] in REPEATCODES: - raise error, "multiple repeat" - if sourcematch("?"): - subpattern[-1] = (MIN_REPEAT, (min, max, item)) - else: - subpattern[-1] = (MAX_REPEAT, (min, max, item)) - - elif this == ".": - subpatternappend((ANY, None)) - - elif this == "(": - group = 1 - name = None - condgroup = None - if sourcematch("?"): - group = 0 - # options - if sourcematch("P"): - # python extensions - if sourcematch("<"): - # named group: skip forward to end of name - name = "" - while 1: - char = sourceget() - if char is None: - raise error, "unterminated name" - if char == ">": - break - name = name + char - group = 1 - if not name: - raise error("missing group name") - if not isname(name): - raise error("bad character in group name %r" % - name) - elif sourcematch("="): - # named backreference - name = "" - while 1: - char = sourceget() - if char is None: - raise error, "unterminated name" - if char == ")": - break - name = name + char - if not name: - raise error("missing group name") - if not isname(name): - raise error("bad character in backref group name " - "%r" % name) - gid = state.groupdict.get(name) - if gid is None: - msg = "unknown group name: {0!r}".format(name) - raise error(msg) - if state.lookbehind: - import warnings - warnings.warn('group references in lookbehind ' - 'assertions are not supported', - RuntimeWarning) - subpatternappend((GROUPREF, gid)) - continue - else: - char = sourceget() - if char is None: - raise error, "unexpected end of pattern" - raise error, "unknown specifier: ?P%s" % char - elif sourcematch(":"): - # non-capturing group - group = 2 - elif sourcematch("#"): - # comment - while 1: - if source.next is None or source.next == ")": - break - sourceget() - if not sourcematch(")"): - raise error, "unbalanced parenthesis" - continue - elif source.next in ASSERTCHARS: - # lookahead assertions - char = sourceget() - dir = 1 - if char == "<": - if source.next not in LOOKBEHINDASSERTCHARS: - raise error, "syntax error" - dir = -1 # lookbehind - char = sourceget() - state.lookbehind += 1 - p = _parse_sub(source, state) - if dir < 0: - state.lookbehind -= 1 - if not sourcematch(")"): - raise error, "unbalanced parenthesis" - if char == "=": - subpatternappend((ASSERT, (dir, p))) - else: - subpatternappend((ASSERT_NOT, (dir, p))) - continue - elif sourcematch("("): - # conditional backreference group - condname = "" - while 1: - char = sourceget() - if char is None: - raise error, "unterminated name" - if char == ")": - break - condname = condname + char - group = 2 - if not condname: - raise error("missing group name") - if isname(condname): - condgroup = state.groupdict.get(condname) - if condgroup is None: - msg = "unknown group name: {0!r}".format(condname) - raise error(msg) - else: - try: - condgroup = int(condname) - except ValueError: - raise error, "bad character in group name" - if state.lookbehind: - import warnings - warnings.warn('group references in lookbehind ' - 'assertions are not supported', - RuntimeWarning) - else: - # flags - if not source.next in FLAGS: - raise error, "unexpected end of pattern" - while source.next in FLAGS: - state.flags = state.flags | FLAGS[sourceget()] - if group: - # parse group contents - if group == 2: - # anonymous group - group = None - else: - group = state.opengroup(name) - if condgroup: - p = _parse_sub_cond(source, state, condgroup) - else: - p = _parse_sub(source, state) - if not sourcematch(")"): - raise error, "unbalanced parenthesis" - if group is not None: - state.closegroup(group) - subpatternappend((SUBPATTERN, (group, p))) - else: - while 1: - char = sourceget() - if char is None: - raise error, "unexpected end of pattern" - if char == ")": - break - raise error, "unknown extension" - - elif this == "^": - subpatternappend((AT, AT_BEGINNING)) - - elif this == "$": - subpattern.append((AT, AT_END)) - - elif this and this[0] == "\\": - code = _escape(source, this, state) - subpatternappend(code) - - else: - raise error, "parser error" - - return subpattern - -def parse(str, flags=0, pattern=None): - # parse 're' pattern into list of (opcode, argument) tuples - - source = Tokenizer(str) - - if pattern is None: - pattern = Pattern() - pattern.flags = flags - pattern.str = str - - p = _parse_sub(source, pattern, 0) - - tail = source.get() - if tail == ")": - raise error, "unbalanced parenthesis" - elif tail: - raise error, "bogus characters at end of regular expression" - - if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: - # the VERBOSE flag was switched on inside the pattern. to be - # on the safe side, we'll parse the whole thing again... - return parse(str, p.pattern.flags) - - if flags & SRE_FLAG_DEBUG: - p.dump() - - return p - -def parse_template(source, pattern): - # parse 're' replacement string into list of literals and - # group references - s = Tokenizer(source) - sget = s.get - p = [] - a = p.append - def literal(literal, p=p, pappend=a): - if p and p[-1][0] is LITERAL: - p[-1] = LITERAL, p[-1][1] + literal - else: - pappend((LITERAL, literal)) - sep = source[:0] - if type(sep) is type(""): - makechar = chr - else: - makechar = unichr - while 1: - this = sget() - if this is None: - break # end of replacement string - if this and this[0] == "\\": - # group - c = this[1:2] - if c == "g": - name = "" - if s.match("<"): - while 1: - char = sget() - if char is None: - raise error, "unterminated group name" - if char == ">": - break - name = name + char - if not name: - raise error, "missing group name" - try: - index = int(name) - if index < 0: - raise error, "negative group number" - except ValueError: - if not isname(name): - raise error, "bad character in group name" - try: - index = pattern.groupindex[name] - except KeyError: - msg = "unknown group name: {0!r}".format(name) - raise IndexError(msg) - a((MARK, index)) - elif c == "0": - if s.next in OCTDIGITS: - this = this + sget() - if s.next in OCTDIGITS: - this = this + sget() - literal(makechar(int(this[1:], 8) & 0xff)) - elif c in DIGITS: - isoctal = False - if s.next in DIGITS: - this = this + sget() - if (c in OCTDIGITS and this[2] in OCTDIGITS and - s.next in OCTDIGITS): - this = this + sget() - isoctal = True - literal(makechar(int(this[1:], 8) & 0xff)) - if not isoctal: - a((MARK, int(this[1:]))) - else: - try: - this = makechar(ESCAPES[this][1]) - except KeyError: - pass - literal(this) - else: - literal(this) - # convert template to groups and literals lists - i = 0 - groups = [] - groupsappend = groups.append - literals = [None] * len(p) - for c, s in p: - if c is MARK: - groupsappend((i, s)) - # literal[i] is already None - else: - literals[i] = s - i = i + 1 - return groups, literals - -def expand_template(template, match): - g = match.group - sep = match.string[:0] - groups, literals = template - literals = literals[:] - try: - for index, group in groups: - literals[index] = s = g(group) - if s is None: - raise error, "unmatched group" - except IndexError: - raise error, "invalid group reference" - return sep.join(literals) diff --git a/python/Lib/ssl.py b/python/Lib/ssl.py deleted file mode 100755 index e21a12a1d7..0000000000 --- a/python/Lib/ssl.py +++ /dev/null @@ -1,1047 +0,0 @@ -# Wrapper module for _ssl, providing some additional facilities -# implemented in Python. Written by Bill Janssen. - -"""This module provides some more Pythonic support for SSL. - -Object types: - - SSLSocket -- subtype of socket.socket which does SSL over the socket - -Exceptions: - - SSLError -- exception raised for I/O errors - -Functions: - - cert_time_to_seconds -- convert time string used for certificate - notBefore and notAfter functions to integer - seconds past the Epoch (the time values - returned from time.time()) - - fetch_server_certificate (HOST, PORT) -- fetch the certificate provided - by the server running on HOST at port PORT. No - validation of the certificate is performed. - -Integer constants: - -SSL_ERROR_ZERO_RETURN -SSL_ERROR_WANT_READ -SSL_ERROR_WANT_WRITE -SSL_ERROR_WANT_X509_LOOKUP -SSL_ERROR_SYSCALL -SSL_ERROR_SSL -SSL_ERROR_WANT_CONNECT - -SSL_ERROR_EOF -SSL_ERROR_INVALID_ERROR_CODE - -The following group define certificate requirements that one side is -allowing/requiring from the other side: - -CERT_NONE - no certificates from the other side are required (or will - be looked at if provided) -CERT_OPTIONAL - certificates are not required, but if provided will be - validated, and if validation fails, the connection will - also fail -CERT_REQUIRED - certificates are required, and will be validated, and - if validation fails, the connection will also fail - -The following constants identify various SSL protocol variants: - -PROTOCOL_SSLv2 -PROTOCOL_SSLv3 -PROTOCOL_SSLv23 -PROTOCOL_TLS -PROTOCOL_TLSv1 -PROTOCOL_TLSv1_1 -PROTOCOL_TLSv1_2 - -The following constants identify various SSL alert message descriptions as per -http://www.iana.org/assignments/tls-parameters/tls-parameters.xml#tls-parameters-6 - -ALERT_DESCRIPTION_CLOSE_NOTIFY -ALERT_DESCRIPTION_UNEXPECTED_MESSAGE -ALERT_DESCRIPTION_BAD_RECORD_MAC -ALERT_DESCRIPTION_RECORD_OVERFLOW -ALERT_DESCRIPTION_DECOMPRESSION_FAILURE -ALERT_DESCRIPTION_HANDSHAKE_FAILURE -ALERT_DESCRIPTION_BAD_CERTIFICATE -ALERT_DESCRIPTION_UNSUPPORTED_CERTIFICATE -ALERT_DESCRIPTION_CERTIFICATE_REVOKED -ALERT_DESCRIPTION_CERTIFICATE_EXPIRED -ALERT_DESCRIPTION_CERTIFICATE_UNKNOWN -ALERT_DESCRIPTION_ILLEGAL_PARAMETER -ALERT_DESCRIPTION_UNKNOWN_CA -ALERT_DESCRIPTION_ACCESS_DENIED -ALERT_DESCRIPTION_DECODE_ERROR -ALERT_DESCRIPTION_DECRYPT_ERROR -ALERT_DESCRIPTION_PROTOCOL_VERSION -ALERT_DESCRIPTION_INSUFFICIENT_SECURITY -ALERT_DESCRIPTION_INTERNAL_ERROR -ALERT_DESCRIPTION_USER_CANCELLED -ALERT_DESCRIPTION_NO_RENEGOTIATION -ALERT_DESCRIPTION_UNSUPPORTED_EXTENSION -ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE -ALERT_DESCRIPTION_UNRECOGNIZED_NAME -ALERT_DESCRIPTION_BAD_CERTIFICATE_STATUS_RESPONSE -ALERT_DESCRIPTION_BAD_CERTIFICATE_HASH_VALUE -ALERT_DESCRIPTION_UNKNOWN_PSK_IDENTITY -""" - -import textwrap -import re -import sys -import os -from collections import namedtuple -from contextlib import closing - -import _ssl # if we can't import it, let the error propagate - -from _ssl import OPENSSL_VERSION_NUMBER, OPENSSL_VERSION_INFO, OPENSSL_VERSION -from _ssl import _SSLContext -from _ssl import ( - SSLError, SSLZeroReturnError, SSLWantReadError, SSLWantWriteError, - SSLSyscallError, SSLEOFError, - ) -from _ssl import CERT_NONE, CERT_OPTIONAL, CERT_REQUIRED -from _ssl import txt2obj as _txt2obj, nid2obj as _nid2obj -from _ssl import RAND_status, RAND_add -try: - from _ssl import RAND_egd -except ImportError: - # LibreSSL does not provide RAND_egd - pass - -def _import_symbols(prefix): - for n in dir(_ssl): - if n.startswith(prefix): - globals()[n] = getattr(_ssl, n) - -_import_symbols('OP_') -_import_symbols('ALERT_DESCRIPTION_') -_import_symbols('SSL_ERROR_') -_import_symbols('PROTOCOL_') -_import_symbols('VERIFY_') - -from _ssl import HAS_SNI, HAS_ECDH, HAS_NPN, HAS_ALPN - -from _ssl import _OPENSSL_API_VERSION - -_PROTOCOL_NAMES = {value: name for name, value in globals().items() - if name.startswith('PROTOCOL_') - and name != 'PROTOCOL_SSLv23'} -PROTOCOL_SSLv23 = PROTOCOL_TLS - -try: - _SSLv2_IF_EXISTS = PROTOCOL_SSLv2 -except NameError: - _SSLv2_IF_EXISTS = None - -from socket import socket, _fileobject, _delegate_methods, error as socket_error -if sys.platform == "win32": - from _ssl import enum_certificates, enum_crls - -from socket import socket, AF_INET, SOCK_STREAM, create_connection -from socket import SOL_SOCKET, SO_TYPE -import base64 # for DER-to-PEM translation -import errno -import warnings - -if _ssl.HAS_TLS_UNIQUE: - CHANNEL_BINDING_TYPES = ['tls-unique'] -else: - CHANNEL_BINDING_TYPES = [] - - -# Disable weak or insecure ciphers by default -# (OpenSSL's default setting is 'DEFAULT:!aNULL:!eNULL') -# Enable a better set of ciphers by default -# This list has been explicitly chosen to: -# * Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE) -# * Prefer ECDHE over DHE for better performance -# * Prefer AEAD over CBC for better performance and security -# * Prefer AES-GCM over ChaCha20 because most platforms have AES-NI -# (ChaCha20 needs OpenSSL 1.1.0 or patched 1.0.2) -# * Prefer any AES-GCM and ChaCha20 over any AES-CBC for better -# performance and security -# * Then Use HIGH cipher suites as a fallback -# * Disable NULL authentication, NULL encryption, 3DES and MD5 MACs -# for security reasons -_DEFAULT_CIPHERS = ( - 'ECDH+AESGCM:ECDH+CHACHA20:DH+AESGCM:DH+CHACHA20:ECDH+AES256:DH+AES256:' - 'ECDH+AES128:DH+AES:ECDH+HIGH:DH+HIGH:RSA+AESGCM:RSA+AES:RSA+HIGH:' - '!aNULL:!eNULL:!MD5:!3DES' - ) - -# Restricted and more secure ciphers for the server side -# This list has been explicitly chosen to: -# * Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE) -# * Prefer ECDHE over DHE for better performance -# * Prefer AEAD over CBC for better performance and security -# * Prefer AES-GCM over ChaCha20 because most platforms have AES-NI -# * Prefer any AES-GCM and ChaCha20 over any AES-CBC for better -# performance and security -# * Then Use HIGH cipher suites as a fallback -# * Disable NULL authentication, NULL encryption, MD5 MACs, DSS, RC4, and -# 3DES for security reasons -_RESTRICTED_SERVER_CIPHERS = ( - 'ECDH+AESGCM:ECDH+CHACHA20:DH+AESGCM:DH+CHACHA20:ECDH+AES256:DH+AES256:' - 'ECDH+AES128:DH+AES:ECDH+HIGH:DH+HIGH:RSA+AESGCM:RSA+AES:RSA+HIGH:' - '!aNULL:!eNULL:!MD5:!DSS:!RC4:!3DES' -) - - -class CertificateError(ValueError): - pass - - -def _dnsname_match(dn, hostname, max_wildcards=1): - """Matching according to RFC 6125, section 6.4.3 - - http://tools.ietf.org/html/rfc6125#section-6.4.3 - """ - pats = [] - if not dn: - return False - - pieces = dn.split(r'.') - leftmost = pieces[0] - remainder = pieces[1:] - - wildcards = leftmost.count('*') - if wildcards > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survery of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise CertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) - - # speed up common case w/o wildcards - if not wildcards: - return dn.lower() == hostname.lower() - - # RFC 6125, section 6.4.3, subitem 1. - # The client SHOULD NOT attempt to match a presented identifier in which - # the wildcard character comprises a label other than the left-most label. - if leftmost == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - elif leftmost.startswith('xn--') or hostname.startswith('xn--'): - # RFC 6125, section 6.4.3, subitem 3. - # The client SHOULD NOT attempt to match a presented identifier - # where the wildcard character is embedded within an A-label or - # U-label of an internationalized domain name. - pats.append(re.escape(leftmost)) - else: - # Otherwise, '*' matches any dotless string, e.g. www* - pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) - - # add the remaining fragments, ignore any wildcards - for frag in remainder: - pats.append(re.escape(frag)) - - pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - return pat.match(hostname) - - -def match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 - rules are followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate, match_hostname needs a " - "SSL socket or SSL context with either " - "CERT_OPTIONAL or CERT_REQUIRED") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise CertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise CertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise CertificateError("no appropriate commonName or " - "subjectAltName fields were found") - - -DefaultVerifyPaths = namedtuple("DefaultVerifyPaths", - "cafile capath openssl_cafile_env openssl_cafile openssl_capath_env " - "openssl_capath") - -def get_default_verify_paths(): - """Return paths to default cafile and capath. - """ - parts = _ssl.get_default_verify_paths() - - # environment vars shadow paths - cafile = os.environ.get(parts[0], parts[1]) - capath = os.environ.get(parts[2], parts[3]) - - return DefaultVerifyPaths(cafile if os.path.isfile(cafile) else None, - capath if os.path.isdir(capath) else None, - *parts) - - -class _ASN1Object(namedtuple("_ASN1Object", "nid shortname longname oid")): - """ASN.1 object identifier lookup - """ - __slots__ = () - - def __new__(cls, oid): - return super(_ASN1Object, cls).__new__(cls, *_txt2obj(oid, name=False)) - - @classmethod - def fromnid(cls, nid): - """Create _ASN1Object from OpenSSL numeric ID - """ - return super(_ASN1Object, cls).__new__(cls, *_nid2obj(nid)) - - @classmethod - def fromname(cls, name): - """Create _ASN1Object from short name, long name or OID - """ - return super(_ASN1Object, cls).__new__(cls, *_txt2obj(name, name=True)) - - -class Purpose(_ASN1Object): - """SSLContext purpose flags with X509v3 Extended Key Usage objects - """ - -Purpose.SERVER_AUTH = Purpose('1.3.6.1.5.5.7.3.1') -Purpose.CLIENT_AUTH = Purpose('1.3.6.1.5.5.7.3.2') - - -class SSLContext(_SSLContext): - """An SSLContext holds various SSL-related configuration options and - data, such as certificates and possibly a private key.""" - - __slots__ = ('protocol', '__weakref__') - _windows_cert_stores = ("CA", "ROOT") - - def __new__(cls, protocol, *args, **kwargs): - self = _SSLContext.__new__(cls, protocol) - if protocol != _SSLv2_IF_EXISTS: - self.set_ciphers(_DEFAULT_CIPHERS) - return self - - def __init__(self, protocol): - self.protocol = protocol - - def wrap_socket(self, sock, server_side=False, - do_handshake_on_connect=True, - suppress_ragged_eofs=True, - server_hostname=None): - return SSLSocket(sock=sock, server_side=server_side, - do_handshake_on_connect=do_handshake_on_connect, - suppress_ragged_eofs=suppress_ragged_eofs, - server_hostname=server_hostname, - _context=self) - - def set_npn_protocols(self, npn_protocols): - protos = bytearray() - for protocol in npn_protocols: - b = protocol.encode('ascii') - if len(b) == 0 or len(b) > 255: - raise SSLError('NPN protocols must be 1 to 255 in length') - protos.append(len(b)) - protos.extend(b) - - self._set_npn_protocols(protos) - - def set_alpn_protocols(self, alpn_protocols): - protos = bytearray() - for protocol in alpn_protocols: - b = protocol.encode('ascii') - if len(b) == 0 or len(b) > 255: - raise SSLError('ALPN protocols must be 1 to 255 in length') - protos.append(len(b)) - protos.extend(b) - - self._set_alpn_protocols(protos) - - def _load_windows_store_certs(self, storename, purpose): - certs = bytearray() - try: - for cert, encoding, trust in enum_certificates(storename): - # CA certs are never PKCS#7 encoded - if encoding == "x509_asn": - if trust is True or purpose.oid in trust: - certs.extend(cert) - except OSError: - warnings.warn("unable to enumerate Windows certificate store") - if certs: - self.load_verify_locations(cadata=certs) - return certs - - def load_default_certs(self, purpose=Purpose.SERVER_AUTH): - if not isinstance(purpose, _ASN1Object): - raise TypeError(purpose) - if sys.platform == "win32": - for storename in self._windows_cert_stores: - self._load_windows_store_certs(storename, purpose) - self.set_default_verify_paths() - - -def create_default_context(purpose=Purpose.SERVER_AUTH, cafile=None, - capath=None, cadata=None): - """Create a SSLContext object with default settings. - - NOTE: The protocol and settings may change anytime without prior - deprecation. The values represent a fair balance between maximum - compatibility and security. - """ - if not isinstance(purpose, _ASN1Object): - raise TypeError(purpose) - - context = SSLContext(PROTOCOL_TLS) - - # SSLv2 considered harmful. - context.options |= OP_NO_SSLv2 - - # SSLv3 has problematic security and is only required for really old - # clients such as IE6 on Windows XP - context.options |= OP_NO_SSLv3 - - # disable compression to prevent CRIME attacks (OpenSSL 1.0+) - context.options |= getattr(_ssl, "OP_NO_COMPRESSION", 0) - - if purpose == Purpose.SERVER_AUTH: - # verify certs and host name in client mode - context.verify_mode = CERT_REQUIRED - context.check_hostname = True - elif purpose == Purpose.CLIENT_AUTH: - # Prefer the server's ciphers by default so that we get stronger - # encryption - context.options |= getattr(_ssl, "OP_CIPHER_SERVER_PREFERENCE", 0) - - # Use single use keys in order to improve forward secrecy - context.options |= getattr(_ssl, "OP_SINGLE_DH_USE", 0) - context.options |= getattr(_ssl, "OP_SINGLE_ECDH_USE", 0) - - # disallow ciphers with known vulnerabilities - context.set_ciphers(_RESTRICTED_SERVER_CIPHERS) - - if cafile or capath or cadata: - context.load_verify_locations(cafile, capath, cadata) - elif context.verify_mode != CERT_NONE: - # no explicit cafile, capath or cadata but the verify mode is - # CERT_OPTIONAL or CERT_REQUIRED. Let's try to load default system - # root CA certificates for the given purpose. This may fail silently. - context.load_default_certs(purpose) - return context - -def _create_unverified_context(protocol=PROTOCOL_TLS, cert_reqs=None, - check_hostname=False, purpose=Purpose.SERVER_AUTH, - certfile=None, keyfile=None, - cafile=None, capath=None, cadata=None): - """Create a SSLContext object for Python stdlib modules - - All Python stdlib modules shall use this function to create SSLContext - objects in order to keep common settings in one place. The configuration - is less restrict than create_default_context()'s to increase backward - compatibility. - """ - if not isinstance(purpose, _ASN1Object): - raise TypeError(purpose) - - context = SSLContext(protocol) - # SSLv2 considered harmful. - context.options |= OP_NO_SSLv2 - # SSLv3 has problematic security and is only required for really old - # clients such as IE6 on Windows XP - context.options |= OP_NO_SSLv3 - - if cert_reqs is not None: - context.verify_mode = cert_reqs - context.check_hostname = check_hostname - - if keyfile and not certfile: - raise ValueError("certfile must be specified") - if certfile or keyfile: - context.load_cert_chain(certfile, keyfile) - - # load CA root certs - if cafile or capath or cadata: - context.load_verify_locations(cafile, capath, cadata) - elif context.verify_mode != CERT_NONE: - # no explicit cafile, capath or cadata but the verify mode is - # CERT_OPTIONAL or CERT_REQUIRED. Let's try to load default system - # root CA certificates for the given purpose. This may fail silently. - context.load_default_certs(purpose) - - return context - -# Backwards compatibility alias, even though it's not a public name. -_create_stdlib_context = _create_unverified_context - -# PEP 493: Verify HTTPS by default, but allow envvar to override that -_https_verify_envvar = 'PYTHONHTTPSVERIFY' - -def _get_https_context_factory(): - if not sys.flags.ignore_environment: - config_setting = os.environ.get(_https_verify_envvar) - if config_setting == '0': - return _create_unverified_context - return create_default_context - -_create_default_https_context = _get_https_context_factory() - -# PEP 493: "private" API to configure HTTPS defaults without monkeypatching -def _https_verify_certificates(enable=True): - """Verify server HTTPS certificates by default?""" - global _create_default_https_context - if enable: - _create_default_https_context = create_default_context - else: - _create_default_https_context = _create_unverified_context - - -class SSLSocket(socket): - """This class implements a subtype of socket.socket that wraps - the underlying OS socket in an SSL context when necessary, and - provides read and write methods over that channel.""" - - def __init__(self, sock=None, keyfile=None, certfile=None, - server_side=False, cert_reqs=CERT_NONE, - ssl_version=PROTOCOL_TLS, ca_certs=None, - do_handshake_on_connect=True, - family=AF_INET, type=SOCK_STREAM, proto=0, fileno=None, - suppress_ragged_eofs=True, npn_protocols=None, ciphers=None, - server_hostname=None, - _context=None): - - self._makefile_refs = 0 - if _context: - self._context = _context - else: - if server_side and not certfile: - raise ValueError("certfile must be specified for server-side " - "operations") - if keyfile and not certfile: - raise ValueError("certfile must be specified") - if certfile and not keyfile: - keyfile = certfile - self._context = SSLContext(ssl_version) - self._context.verify_mode = cert_reqs - if ca_certs: - self._context.load_verify_locations(ca_certs) - if certfile: - self._context.load_cert_chain(certfile, keyfile) - if npn_protocols: - self._context.set_npn_protocols(npn_protocols) - if ciphers: - self._context.set_ciphers(ciphers) - self.keyfile = keyfile - self.certfile = certfile - self.cert_reqs = cert_reqs - self.ssl_version = ssl_version - self.ca_certs = ca_certs - self.ciphers = ciphers - # Can't use sock.type as other flags (such as SOCK_NONBLOCK) get - # mixed in. - if sock.getsockopt(SOL_SOCKET, SO_TYPE) != SOCK_STREAM: - raise NotImplementedError("only stream sockets are supported") - socket.__init__(self, _sock=sock._sock) - # The initializer for socket overrides the methods send(), recv(), etc. - # in the instancce, which we don't need -- but we want to provide the - # methods defined in SSLSocket. - for attr in _delegate_methods: - try: - delattr(self, attr) - except AttributeError: - pass - if server_side and server_hostname: - raise ValueError("server_hostname can only be specified " - "in client mode") - if self._context.check_hostname and not server_hostname: - raise ValueError("check_hostname requires server_hostname") - self.server_side = server_side - self.server_hostname = server_hostname - self.do_handshake_on_connect = do_handshake_on_connect - self.suppress_ragged_eofs = suppress_ragged_eofs - - # See if we are connected - try: - self.getpeername() - except socket_error as e: - if e.errno != errno.ENOTCONN: - raise - connected = False - else: - connected = True - - self._closed = False - self._sslobj = None - self._connected = connected - if connected: - # create the SSL object - try: - self._sslobj = self._context._wrap_socket(self._sock, server_side, - server_hostname, ssl_sock=self) - if do_handshake_on_connect: - timeout = self.gettimeout() - if timeout == 0.0: - # non-blocking - raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets") - self.do_handshake() - - except (OSError, ValueError): - self.close() - raise - - @property - def context(self): - return self._context - - @context.setter - def context(self, ctx): - self._context = ctx - self._sslobj.context = ctx - - def dup(self): - raise NotImplemented("Can't dup() %s instances" % - self.__class__.__name__) - - def _checkClosed(self, msg=None): - # raise an exception here if you wish to check for spurious closes - pass - - def _check_connected(self): - if not self._connected: - # getpeername() will raise ENOTCONN if the socket is really - # not connected; note that we can be connected even without - # _connected being set, e.g. if connect() first returned - # EAGAIN. - self.getpeername() - - def read(self, len=1024, buffer=None): - """Read up to LEN bytes and return them. - Return zero-length string on EOF.""" - - self._checkClosed() - if not self._sslobj: - raise ValueError("Read on closed or unwrapped SSL socket.") - try: - if buffer is not None: - v = self._sslobj.read(len, buffer) - else: - v = self._sslobj.read(len) - return v - except SSLError as x: - if x.args[0] == SSL_ERROR_EOF and self.suppress_ragged_eofs: - if buffer is not None: - return 0 - else: - return b'' - else: - raise - - def write(self, data): - """Write DATA to the underlying SSL channel. Returns - number of bytes of DATA actually transmitted.""" - - self._checkClosed() - if not self._sslobj: - raise ValueError("Write on closed or unwrapped SSL socket.") - return self._sslobj.write(data) - - def getpeercert(self, binary_form=False): - """Returns a formatted version of the data in the - certificate provided by the other end of the SSL channel. - Return None if no certificate was provided, {} if a - certificate was provided, but not validated.""" - - self._checkClosed() - self._check_connected() - return self._sslobj.peer_certificate(binary_form) - - def selected_npn_protocol(self): - self._checkClosed() - if not self._sslobj or not _ssl.HAS_NPN: - return None - else: - return self._sslobj.selected_npn_protocol() - - def selected_alpn_protocol(self): - self._checkClosed() - if not self._sslobj or not _ssl.HAS_ALPN: - return None - else: - return self._sslobj.selected_alpn_protocol() - - def cipher(self): - self._checkClosed() - if not self._sslobj: - return None - else: - return self._sslobj.cipher() - - def compression(self): - self._checkClosed() - if not self._sslobj: - return None - else: - return self._sslobj.compression() - - def send(self, data, flags=0): - self._checkClosed() - if self._sslobj: - if flags != 0: - raise ValueError( - "non-zero flags not allowed in calls to send() on %s" % - self.__class__) - try: - v = self._sslobj.write(data) - except SSLError as x: - if x.args[0] == SSL_ERROR_WANT_READ: - return 0 - elif x.args[0] == SSL_ERROR_WANT_WRITE: - return 0 - else: - raise - else: - return v - else: - return self._sock.send(data, flags) - - def sendto(self, data, flags_or_addr, addr=None): - self._checkClosed() - if self._sslobj: - raise ValueError("sendto not allowed on instances of %s" % - self.__class__) - elif addr is None: - return self._sock.sendto(data, flags_or_addr) - else: - return self._sock.sendto(data, flags_or_addr, addr) - - - def sendall(self, data, flags=0): - self._checkClosed() - if self._sslobj: - if flags != 0: - raise ValueError( - "non-zero flags not allowed in calls to sendall() on %s" % - self.__class__) - amount = len(data) - count = 0 - while (count < amount): - v = self.send(data[count:]) - count += v - return amount - else: - return socket.sendall(self, data, flags) - - def recv(self, buflen=1024, flags=0): - self._checkClosed() - if self._sslobj: - if flags != 0: - raise ValueError( - "non-zero flags not allowed in calls to recv() on %s" % - self.__class__) - return self.read(buflen) - else: - return self._sock.recv(buflen, flags) - - def recv_into(self, buffer, nbytes=None, flags=0): - self._checkClosed() - if buffer and (nbytes is None): - nbytes = len(buffer) - elif nbytes is None: - nbytes = 1024 - if self._sslobj: - if flags != 0: - raise ValueError( - "non-zero flags not allowed in calls to recv_into() on %s" % - self.__class__) - return self.read(nbytes, buffer) - else: - return self._sock.recv_into(buffer, nbytes, flags) - - def recvfrom(self, buflen=1024, flags=0): - self._checkClosed() - if self._sslobj: - raise ValueError("recvfrom not allowed on instances of %s" % - self.__class__) - else: - return self._sock.recvfrom(buflen, flags) - - def recvfrom_into(self, buffer, nbytes=None, flags=0): - self._checkClosed() - if self._sslobj: - raise ValueError("recvfrom_into not allowed on instances of %s" % - self.__class__) - else: - return self._sock.recvfrom_into(buffer, nbytes, flags) - - - def pending(self): - self._checkClosed() - if self._sslobj: - return self._sslobj.pending() - else: - return 0 - - def shutdown(self, how): - self._checkClosed() - self._sslobj = None - socket.shutdown(self, how) - - def close(self): - if self._makefile_refs < 1: - self._sslobj = None - socket.close(self) - else: - self._makefile_refs -= 1 - - def unwrap(self): - if self._sslobj: - s = self._sslobj.shutdown() - self._sslobj = None - return s - else: - raise ValueError("No SSL wrapper around " + str(self)) - - def _real_close(self): - self._sslobj = None - socket._real_close(self) - - def do_handshake(self, block=False): - """Perform a TLS/SSL handshake.""" - self._check_connected() - timeout = self.gettimeout() - try: - if timeout == 0.0 and block: - self.settimeout(None) - self._sslobj.do_handshake() - finally: - self.settimeout(timeout) - - if self.context.check_hostname: - if not self.server_hostname: - raise ValueError("check_hostname needs server_hostname " - "argument") - match_hostname(self.getpeercert(), self.server_hostname) - - def _real_connect(self, addr, connect_ex): - if self.server_side: - raise ValueError("can't connect in server-side mode") - # Here we assume that the socket is client-side, and not - # connected at the time of the call. We connect it, then wrap it. - if self._connected: - raise ValueError("attempt to connect already-connected SSLSocket!") - self._sslobj = self.context._wrap_socket(self._sock, False, self.server_hostname, ssl_sock=self) - try: - if connect_ex: - rc = socket.connect_ex(self, addr) - else: - rc = None - socket.connect(self, addr) - if not rc: - self._connected = True - if self.do_handshake_on_connect: - self.do_handshake() - return rc - except (OSError, ValueError): - self._sslobj = None - raise - - def connect(self, addr): - """Connects to remote ADDR, and then wraps the connection in - an SSL channel.""" - self._real_connect(addr, False) - - def connect_ex(self, addr): - """Connects to remote ADDR, and then wraps the connection in - an SSL channel.""" - return self._real_connect(addr, True) - - def accept(self): - """Accepts a new connection from a remote client, and returns - a tuple containing that new connection wrapped with a server-side - SSL channel, and the address of the remote client.""" - - newsock, addr = socket.accept(self) - newsock = self.context.wrap_socket(newsock, - do_handshake_on_connect=self.do_handshake_on_connect, - suppress_ragged_eofs=self.suppress_ragged_eofs, - server_side=True) - return newsock, addr - - def makefile(self, mode='r', bufsize=-1): - - """Make and return a file-like object that - works with the SSL connection. Just use the code - from the socket module.""" - - self._makefile_refs += 1 - # close=True so as to decrement the reference count when done with - # the file-like object. - return _fileobject(self, mode, bufsize, close=True) - - def get_channel_binding(self, cb_type="tls-unique"): - """Get channel binding data for current connection. Raise ValueError - if the requested `cb_type` is not supported. Return bytes of the data - or None if the data is not available (e.g. before the handshake). - """ - if cb_type not in CHANNEL_BINDING_TYPES: - raise ValueError("Unsupported channel binding type") - if cb_type != "tls-unique": - raise NotImplementedError( - "{0} channel binding type not implemented" - .format(cb_type)) - if self._sslobj is None: - return None - return self._sslobj.tls_unique_cb() - - def version(self): - """ - Return a string identifying the protocol version used by the - current SSL channel, or None if there is no established channel. - """ - if self._sslobj is None: - return None - return self._sslobj.version() - - -def wrap_socket(sock, keyfile=None, certfile=None, - server_side=False, cert_reqs=CERT_NONE, - ssl_version=PROTOCOL_TLS, ca_certs=None, - do_handshake_on_connect=True, - suppress_ragged_eofs=True, - ciphers=None): - - return SSLSocket(sock=sock, keyfile=keyfile, certfile=certfile, - server_side=server_side, cert_reqs=cert_reqs, - ssl_version=ssl_version, ca_certs=ca_certs, - do_handshake_on_connect=do_handshake_on_connect, - suppress_ragged_eofs=suppress_ragged_eofs, - ciphers=ciphers) - -# some utility functions - -def cert_time_to_seconds(cert_time): - """Return the time in seconds since the Epoch, given the timestring - representing the "notBefore" or "notAfter" date from a certificate - in ``"%b %d %H:%M:%S %Y %Z"`` strptime format (C locale). - - "notBefore" or "notAfter" dates must use UTC (RFC 5280). - - Month is one of: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec - UTC should be specified as GMT (see ASN1_TIME_print()) - """ - from time import strptime - from calendar import timegm - - months = ( - "Jan","Feb","Mar","Apr","May","Jun", - "Jul","Aug","Sep","Oct","Nov","Dec" - ) - time_format = ' %d %H:%M:%S %Y GMT' # NOTE: no month, fixed GMT - try: - month_number = months.index(cert_time[:3].title()) + 1 - except ValueError: - raise ValueError('time data %r does not match ' - 'format "%%b%s"' % (cert_time, time_format)) - else: - # found valid month - tt = strptime(cert_time[3:], time_format) - # return an integer, the previous mktime()-based implementation - # returned a float (fractional seconds are always zero here). - return timegm((tt[0], month_number) + tt[2:6]) - -PEM_HEADER = "-----BEGIN CERTIFICATE-----" -PEM_FOOTER = "-----END CERTIFICATE-----" - -def DER_cert_to_PEM_cert(der_cert_bytes): - """Takes a certificate in binary DER format and returns the - PEM version of it as a string.""" - - f = base64.standard_b64encode(der_cert_bytes).decode('ascii') - return (PEM_HEADER + '\n' + - textwrap.fill(f, 64) + '\n' + - PEM_FOOTER + '\n') - -def PEM_cert_to_DER_cert(pem_cert_string): - """Takes a certificate in ASCII PEM format and returns the - DER-encoded version of it as a byte sequence""" - - if not pem_cert_string.startswith(PEM_HEADER): - raise ValueError("Invalid PEM encoding; must start with %s" - % PEM_HEADER) - if not pem_cert_string.strip().endswith(PEM_FOOTER): - raise ValueError("Invalid PEM encoding; must end with %s" - % PEM_FOOTER) - d = pem_cert_string.strip()[len(PEM_HEADER):-len(PEM_FOOTER)] - return base64.decodestring(d.encode('ASCII', 'strict')) - -def get_server_certificate(addr, ssl_version=PROTOCOL_TLS, ca_certs=None): - """Retrieve the certificate from the server at the specified address, - and return it as a PEM-encoded string. - If 'ca_certs' is specified, validate the server cert against it. - If 'ssl_version' is specified, use it in the connection attempt.""" - - host, port = addr - if ca_certs is not None: - cert_reqs = CERT_REQUIRED - else: - cert_reqs = CERT_NONE - context = _create_stdlib_context(ssl_version, - cert_reqs=cert_reqs, - cafile=ca_certs) - with closing(create_connection(addr)) as sock: - with closing(context.wrap_socket(sock)) as sslsock: - dercert = sslsock.getpeercert(True) - return DER_cert_to_PEM_cert(dercert) - -def get_protocol_name(protocol_code): - return _PROTOCOL_NAMES.get(protocol_code, '') - - -# a replacement for the old socket.ssl function - -def sslwrap_simple(sock, keyfile=None, certfile=None): - """A replacement for the old socket.ssl function. Designed - for compability with Python 2.5 and earlier. Will disappear in - Python 3.0.""" - if hasattr(sock, "_sock"): - sock = sock._sock - - ctx = SSLContext(PROTOCOL_SSLv23) - if keyfile or certfile: - ctx.load_cert_chain(certfile, keyfile) - ssl_sock = ctx._wrap_socket(sock, server_side=False) - try: - sock.getpeername() - except socket_error: - # no, no connection yet - pass - else: - # yes, do the handshake - ssl_sock.do_handshake() - - return ssl_sock diff --git a/python/Lib/stat.py b/python/Lib/stat.py deleted file mode 100755 index abed5c9e0f..0000000000 --- a/python/Lib/stat.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Constants/functions for interpreting results of os.stat() and os.lstat(). - -Suggested usage: from stat import * -""" - -# Indices for stat struct members in the tuple returned by os.stat() - -ST_MODE = 0 -ST_INO = 1 -ST_DEV = 2 -ST_NLINK = 3 -ST_UID = 4 -ST_GID = 5 -ST_SIZE = 6 -ST_ATIME = 7 -ST_MTIME = 8 -ST_CTIME = 9 - -# Extract bits from the mode - -def S_IMODE(mode): - return mode & 07777 - -def S_IFMT(mode): - return mode & 0170000 - -# Constants used as S_IFMT() for various file types -# (not all are implemented on all systems) - -S_IFDIR = 0040000 -S_IFCHR = 0020000 -S_IFBLK = 0060000 -S_IFREG = 0100000 -S_IFIFO = 0010000 -S_IFLNK = 0120000 -S_IFSOCK = 0140000 - -# Functions to test for each file type - -def S_ISDIR(mode): - return S_IFMT(mode) == S_IFDIR - -def S_ISCHR(mode): - return S_IFMT(mode) == S_IFCHR - -def S_ISBLK(mode): - return S_IFMT(mode) == S_IFBLK - -def S_ISREG(mode): - return S_IFMT(mode) == S_IFREG - -def S_ISFIFO(mode): - return S_IFMT(mode) == S_IFIFO - -def S_ISLNK(mode): - return S_IFMT(mode) == S_IFLNK - -def S_ISSOCK(mode): - return S_IFMT(mode) == S_IFSOCK - -# Names for permission bits - -S_ISUID = 04000 -S_ISGID = 02000 -S_ENFMT = S_ISGID -S_ISVTX = 01000 -S_IREAD = 00400 -S_IWRITE = 00200 -S_IEXEC = 00100 -S_IRWXU = 00700 -S_IRUSR = 00400 -S_IWUSR = 00200 -S_IXUSR = 00100 -S_IRWXG = 00070 -S_IRGRP = 00040 -S_IWGRP = 00020 -S_IXGRP = 00010 -S_IRWXO = 00007 -S_IROTH = 00004 -S_IWOTH = 00002 -S_IXOTH = 00001 - -# Names for file flags - -UF_NODUMP = 0x00000001 -UF_IMMUTABLE = 0x00000002 -UF_APPEND = 0x00000004 -UF_OPAQUE = 0x00000008 -UF_NOUNLINK = 0x00000010 -UF_COMPRESSED = 0x00000020 # OS X: file is hfs-compressed -UF_HIDDEN = 0x00008000 # OS X: file should not be displayed -SF_ARCHIVED = 0x00010000 -SF_IMMUTABLE = 0x00020000 -SF_APPEND = 0x00040000 -SF_NOUNLINK = 0x00100000 -SF_SNAPSHOT = 0x00200000 diff --git a/python/Lib/statvfs.py b/python/Lib/statvfs.py deleted file mode 100755 index f572422e8a..0000000000 --- a/python/Lib/statvfs.py +++ /dev/null @@ -1,18 +0,0 @@ -"""Constants for interpreting the results of os.statvfs() and os.fstatvfs().""" -from warnings import warnpy3k -warnpy3k("the statvfs module has been removed in Python 3.0", stacklevel=2) -del warnpy3k - -# Indices for statvfs struct members in the tuple returned by -# os.statvfs() and os.fstatvfs(). - -F_BSIZE = 0 # Preferred file system block size -F_FRSIZE = 1 # Fundamental file system block size -F_BLOCKS = 2 # Total number of file system blocks (FRSIZE) -F_BFREE = 3 # Total number of free blocks -F_BAVAIL = 4 # Free blocks available to non-superuser -F_FILES = 5 # Total number of file nodes -F_FFREE = 6 # Total number of free file nodes -F_FAVAIL = 7 # Free nodes available to non-superuser -F_FLAG = 8 # Flags (see your local statvfs man page) -F_NAMEMAX = 9 # Maximum file name length diff --git a/python/Lib/string.py b/python/Lib/string.py deleted file mode 100755 index 23608b4ace..0000000000 --- a/python/Lib/string.py +++ /dev/null @@ -1,656 +0,0 @@ -"""A collection of string operations (most are no longer used). - -Warning: most of the code you see here isn't normally used nowadays. -Beginning with Python 1.6, many of these functions are implemented as -methods on the standard string object. They used to be implemented by -a built-in module called strop, but strop is now obsolete itself. - -Public module variables: - -whitespace -- a string containing all characters considered whitespace -lowercase -- a string containing all characters considered lowercase letters -uppercase -- a string containing all characters considered uppercase letters -letters -- a string containing all characters considered letters -digits -- a string containing all characters considered decimal digits -hexdigits -- a string containing all characters considered hexadecimal digits -octdigits -- a string containing all characters considered octal digits -punctuation -- a string containing all characters considered punctuation -printable -- a string containing all characters considered printable - -""" - -# Some strings for ctype-style character classification -whitespace = ' \t\n\r\v\f' -lowercase = 'abcdefghijklmnopqrstuvwxyz' -uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' -letters = lowercase + uppercase -ascii_lowercase = lowercase -ascii_uppercase = uppercase -ascii_letters = ascii_lowercase + ascii_uppercase -digits = '0123456789' -hexdigits = digits + 'abcdef' + 'ABCDEF' -octdigits = '01234567' -punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" -printable = digits + letters + punctuation + whitespace - -# Case conversion helpers -# Use str to convert Unicode literal in case of -U -l = map(chr, xrange(256)) -_idmap = str('').join(l) -del l - -# Functions which aren't available as string methods. - -# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". -def capwords(s, sep=None): - """capwords(s [,sep]) -> string - - Split the argument into words using split, capitalize each - word using capitalize, and join the capitalized words using - join. If the optional second argument sep is absent or None, - runs of whitespace characters are replaced by a single space - and leading and trailing whitespace are removed, otherwise - sep is used to split and join the words. - - """ - return (sep or ' ').join(x.capitalize() for x in s.split(sep)) - - -# Construct a translation string -_idmapL = None -def maketrans(fromstr, tostr): - """maketrans(frm, to) -> string - - Return a translation table (a string of 256 bytes long) - suitable for use in string.translate. The strings frm and to - must be of the same length. - - """ - if len(fromstr) != len(tostr): - raise ValueError, "maketrans arguments must have same length" - global _idmapL - if not _idmapL: - _idmapL = list(_idmap) - L = _idmapL[:] - fromstr = map(ord, fromstr) - for i in range(len(fromstr)): - L[fromstr[i]] = tostr[i] - return ''.join(L) - - - -#################################################################### -import re as _re - -class _multimap: - """Helper class for combining multiple mappings. - - Used by .{safe_,}substitute() to combine the mapping and keyword - arguments. - """ - def __init__(self, primary, secondary): - self._primary = primary - self._secondary = secondary - - def __getitem__(self, key): - try: - return self._primary[key] - except KeyError: - return self._secondary[key] - - -class _TemplateMetaclass(type): - pattern = r""" - %(delim)s(?: - (?P%(delim)s) | # Escape sequence of two delimiters - (?P%(id)s) | # delimiter and a Python identifier - {(?P%(id)s)} | # delimiter and a braced identifier - (?P) # Other ill-formed delimiter exprs - ) - """ - - def __init__(cls, name, bases, dct): - super(_TemplateMetaclass, cls).__init__(name, bases, dct) - if 'pattern' in dct: - pattern = cls.pattern - else: - pattern = _TemplateMetaclass.pattern % { - 'delim' : _re.escape(cls.delimiter), - 'id' : cls.idpattern, - } - cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE) - - -class Template: - """A string class for supporting $-substitutions.""" - __metaclass__ = _TemplateMetaclass - - delimiter = '$' - idpattern = r'[_a-z][_a-z0-9]*' - - def __init__(self, template): - self.template = template - - # Search for $$, $identifier, ${identifier}, and any bare $'s - - def _invalid(self, mo): - i = mo.start('invalid') - lines = self.template[:i].splitlines(True) - if not lines: - colno = 1 - lineno = 1 - else: - colno = i - len(''.join(lines[:-1])) - lineno = len(lines) - raise ValueError('Invalid placeholder in string: line %d, col %d' % - (lineno, colno)) - - def substitute(*args, **kws): - if not args: - raise TypeError("descriptor 'substitute' of 'Template' object " - "needs an argument") - self, args = args[0], args[1:] # allow the "self" keyword be passed - if len(args) > 1: - raise TypeError('Too many positional arguments') - if not args: - mapping = kws - elif kws: - mapping = _multimap(kws, args[0]) - else: - mapping = args[0] - # Helper function for .sub() - def convert(mo): - # Check the most common path first. - named = mo.group('named') or mo.group('braced') - if named is not None: - val = mapping[named] - # We use this idiom instead of str() because the latter will - # fail if val is a Unicode containing non-ASCII characters. - return '%s' % (val,) - if mo.group('escaped') is not None: - return self.delimiter - if mo.group('invalid') is not None: - self._invalid(mo) - raise ValueError('Unrecognized named group in pattern', - self.pattern) - return self.pattern.sub(convert, self.template) - - def safe_substitute(*args, **kws): - if not args: - raise TypeError("descriptor 'safe_substitute' of 'Template' object " - "needs an argument") - self, args = args[0], args[1:] # allow the "self" keyword be passed - if len(args) > 1: - raise TypeError('Too many positional arguments') - if not args: - mapping = kws - elif kws: - mapping = _multimap(kws, args[0]) - else: - mapping = args[0] - # Helper function for .sub() - def convert(mo): - named = mo.group('named') or mo.group('braced') - if named is not None: - try: - # We use this idiom instead of str() because the latter - # will fail if val is a Unicode containing non-ASCII - return '%s' % (mapping[named],) - except KeyError: - return mo.group() - if mo.group('escaped') is not None: - return self.delimiter - if mo.group('invalid') is not None: - return mo.group() - raise ValueError('Unrecognized named group in pattern', - self.pattern) - return self.pattern.sub(convert, self.template) - - - -#################################################################### -# NOTE: Everything below here is deprecated. Use string methods instead. -# This stuff will go away in Python 3.0. - -# Backward compatible names for exceptions -index_error = ValueError -atoi_error = ValueError -atof_error = ValueError -atol_error = ValueError - -# convert UPPER CASE letters to lower case -def lower(s): - """lower(s) -> string - - Return a copy of the string s converted to lowercase. - - """ - return s.lower() - -# Convert lower case letters to UPPER CASE -def upper(s): - """upper(s) -> string - - Return a copy of the string s converted to uppercase. - - """ - return s.upper() - -# Swap lower case letters and UPPER CASE -def swapcase(s): - """swapcase(s) -> string - - Return a copy of the string s with upper case characters - converted to lowercase and vice versa. - - """ - return s.swapcase() - -# Strip leading and trailing tabs and spaces -def strip(s, chars=None): - """strip(s [,chars]) -> string - - Return a copy of the string s with leading and trailing - whitespace removed. - If chars is given and not None, remove characters in chars instead. - If chars is unicode, S will be converted to unicode before stripping. - - """ - return s.strip(chars) - -# Strip leading tabs and spaces -def lstrip(s, chars=None): - """lstrip(s [,chars]) -> string - - Return a copy of the string s with leading whitespace removed. - If chars is given and not None, remove characters in chars instead. - - """ - return s.lstrip(chars) - -# Strip trailing tabs and spaces -def rstrip(s, chars=None): - """rstrip(s [,chars]) -> string - - Return a copy of the string s with trailing whitespace removed. - If chars is given and not None, remove characters in chars instead. - - """ - return s.rstrip(chars) - - -# Split a string into a list of space/tab-separated words -def split(s, sep=None, maxsplit=-1): - """split(s [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string. If maxsplit is given, splits at no more than - maxsplit places (resulting in at most maxsplit+1 words). If sep - is not specified or is None, any whitespace string is a separator. - - (split and splitfields are synonymous) - - """ - return s.split(sep, maxsplit) -splitfields = split - -# Split a string into a list of space/tab-separated words -def rsplit(s, sep=None, maxsplit=-1): - """rsplit(s [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string, starting at the end of the string and working - to the front. If maxsplit is given, at most maxsplit splits are - done. If sep is not specified or is None, any whitespace string - is a separator. - """ - return s.rsplit(sep, maxsplit) - -# Join fields with optional separator -def join(words, sep = ' '): - """join(list [,sep]) -> string - - Return a string composed of the words in list, with - intervening occurrences of sep. The default separator is a - single space. - - (joinfields and join are synonymous) - - """ - return sep.join(words) -joinfields = join - -# Find substring, raise exception if not found -def index(s, *args): - """index(s, sub [,start [,end]]) -> int - - Like find but raises ValueError when the substring is not found. - - """ - return s.index(*args) - -# Find last substring, raise exception if not found -def rindex(s, *args): - """rindex(s, sub [,start [,end]]) -> int - - Like rfind but raises ValueError when the substring is not found. - - """ - return s.rindex(*args) - -# Count non-overlapping occurrences of substring -def count(s, *args): - """count(s, sub[, start[,end]]) -> int - - Return the number of occurrences of substring sub in string - s[start:end]. Optional arguments start and end are - interpreted as in slice notation. - - """ - return s.count(*args) - -# Find substring, return -1 if not found -def find(s, *args): - """find(s, sub [,start [,end]]) -> in - - Return the lowest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - return s.find(*args) - -# Find last substring, return -1 if not found -def rfind(s, *args): - """rfind(s, sub [,start [,end]]) -> int - - Return the highest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - return s.rfind(*args) - -# for a bit of speed -_float = float -_int = int -_long = long - -# Convert string to float -def atof(s): - """atof(s) -> float - - Return the floating point number represented by the string s. - - """ - return _float(s) - - -# Convert string to integer -def atoi(s , base=10): - """atoi(s [,base]) -> int - - Return the integer represented by the string s in the given - base, which defaults to 10. The string s must consist of one - or more digits, possibly preceded by a sign. If base is 0, it - is chosen from the leading characters of s, 0 for octal, 0x or - 0X for hexadecimal. If base is 16, a preceding 0x or 0X is - accepted. - - """ - return _int(s, base) - - -# Convert string to long integer -def atol(s, base=10): - """atol(s [,base]) -> long - - Return the long integer represented by the string s in the - given base, which defaults to 10. The string s must consist - of one or more digits, possibly preceded by a sign. If base - is 0, it is chosen from the leading characters of s, 0 for - octal, 0x or 0X for hexadecimal. If base is 16, a preceding - 0x or 0X is accepted. A trailing L or l is not accepted, - unless base is 0. - - """ - return _long(s, base) - - -# Left-justify a string -def ljust(s, width, *args): - """ljust(s, width[, fillchar]) -> string - - Return a left-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. If specified the fillchar is used instead of spaces. - - """ - return s.ljust(width, *args) - -# Right-justify a string -def rjust(s, width, *args): - """rjust(s, width[, fillchar]) -> string - - Return a right-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. If specified the fillchar is used instead of spaces. - - """ - return s.rjust(width, *args) - -# Center a string -def center(s, width, *args): - """center(s, width[, fillchar]) -> string - - Return a center version of s, in a field of the specified - width. padded with spaces as needed. The string is never - truncated. If specified the fillchar is used instead of spaces. - - """ - return s.center(width, *args) - -# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' -# Decadent feature: the argument may be a string or a number -# (Use of this is deprecated; it should be a string as with ljust c.s.) -def zfill(x, width): - """zfill(x, width) -> string - - Pad a numeric string x with zeros on the left, to fill a field - of the specified width. The string x is never truncated. - - """ - if not isinstance(x, basestring): - x = repr(x) - return x.zfill(width) - -# Expand tabs in a string. -# Doesn't take non-printing chars into account, but does understand \n. -def expandtabs(s, tabsize=8): - """expandtabs(s [,tabsize]) -> string - - Return a copy of the string s with all tab characters replaced - by the appropriate number of spaces, depending on the current - column, and the tabsize (default 8). - - """ - return s.expandtabs(tabsize) - -# Character translation through look-up table. -def translate(s, table, deletions=""): - """translate(s,table [,deletions]) -> string - - Return a copy of the string s, where all characters occurring - in the optional argument deletions are removed, and the - remaining characters have been mapped through the given - translation table, which must be a string of length 256. The - deletions argument is not allowed for Unicode strings. - - """ - if deletions or table is None: - return s.translate(table, deletions) - else: - # Add s[:0] so that if s is Unicode and table is an 8-bit string, - # table is converted to Unicode. This means that table *cannot* - # be a dictionary -- for that feature, use u.translate() directly. - return s.translate(table + s[:0]) - -# Capitalize a string, e.g. "aBc dEf" -> "Abc def". -def capitalize(s): - """capitalize(s) -> string - - Return a copy of the string s with only its first character - capitalized. - - """ - return s.capitalize() - -# Substring replacement (global) -def replace(s, old, new, maxreplace=-1): - """replace (str, old, new[, maxreplace]) -> string - - Return a copy of string str with all occurrences of substring - old replaced by new. If the optional argument maxreplace is - given, only the first maxreplace occurrences are replaced. - - """ - return s.replace(old, new, maxreplace) - - -# Try importing optional built-in module "strop" -- if it exists, -# it redefines some string operations that are 100-1000 times faster. -# It also defines values for whitespace, lowercase and uppercase -# that match 's definitions. - -try: - from strop import maketrans, lowercase, uppercase, whitespace - letters = lowercase + uppercase -except ImportError: - pass # Use the original versions - -######################################################################## -# the Formatter class -# see PEP 3101 for details and purpose of this class - -# The hard parts are reused from the C implementation. They're exposed as "_" -# prefixed methods of str and unicode. - -# The overall parser is implemented in str._formatter_parser. -# The field name parser is implemented in str._formatter_field_name_split - -class Formatter(object): - def format(*args, **kwargs): - if not args: - raise TypeError("descriptor 'format' of 'Formatter' object " - "needs an argument") - self, args = args[0], args[1:] # allow the "self" keyword be passed - try: - format_string, args = args[0], args[1:] # allow the "format_string" keyword be passed - except IndexError: - if 'format_string' in kwargs: - format_string = kwargs.pop('format_string') - else: - raise TypeError("format() missing 1 required positional " - "argument: 'format_string'") - return self.vformat(format_string, args, kwargs) - - def vformat(self, format_string, args, kwargs): - used_args = set() - result = self._vformat(format_string, args, kwargs, used_args, 2) - self.check_unused_args(used_args, args, kwargs) - return result - - def _vformat(self, format_string, args, kwargs, used_args, recursion_depth): - if recursion_depth < 0: - raise ValueError('Max string recursion exceeded') - result = [] - for literal_text, field_name, format_spec, conversion in \ - self.parse(format_string): - - # output the literal text - if literal_text: - result.append(literal_text) - - # if there's a field, output it - if field_name is not None: - # this is some markup, find the object and do - # the formatting - - # given the field_name, find the object it references - # and the argument it came from - obj, arg_used = self.get_field(field_name, args, kwargs) - used_args.add(arg_used) - - # do any conversion on the resulting object - obj = self.convert_field(obj, conversion) - - # expand the format spec, if needed - format_spec = self._vformat(format_spec, args, kwargs, - used_args, recursion_depth-1) - - # format the object and append to the result - result.append(self.format_field(obj, format_spec)) - - return ''.join(result) - - - def get_value(self, key, args, kwargs): - if isinstance(key, (int, long)): - return args[key] - else: - return kwargs[key] - - - def check_unused_args(self, used_args, args, kwargs): - pass - - - def format_field(self, value, format_spec): - return format(value, format_spec) - - - def convert_field(self, value, conversion): - # do any conversion on the resulting object - if conversion is None: - return value - elif conversion == 's': - return str(value) - elif conversion == 'r': - return repr(value) - raise ValueError("Unknown conversion specifier {0!s}".format(conversion)) - - - # returns an iterable that contains tuples of the form: - # (literal_text, field_name, format_spec, conversion) - # literal_text can be zero length - # field_name can be None, in which case there's no - # object to format and output - # if field_name is not None, it is looked up, formatted - # with format_spec and conversion and then used - def parse(self, format_string): - return format_string._formatter_parser() - - - # given a field_name, find the object it references. - # field_name: the field being looked up, e.g. "0.name" - # or "lookup[3]" - # used_args: a set of which args have been used - # args, kwargs: as passed in to vformat - def get_field(self, field_name, args, kwargs): - first, rest = field_name._formatter_field_name_split() - - obj = self.get_value(first, args, kwargs) - - # loop through the rest of the field_name, doing - # getattr or getitem as needed - for is_attr, i in rest: - if is_attr: - obj = getattr(obj, i) - else: - obj = obj[i] - - return obj, first diff --git a/python/Lib/stringold.py b/python/Lib/stringold.py deleted file mode 100755 index ebfe38313a..0000000000 --- a/python/Lib/stringold.py +++ /dev/null @@ -1,432 +0,0 @@ -# module 'string' -- A collection of string operations - -# Warning: most of the code you see here isn't normally used nowadays. With -# Python 1.6, many of these functions are implemented as methods on the -# standard string object. They used to be implemented by a built-in module -# called strop, but strop is now obsolete itself. - -"""Common string manipulations. - -Public module variables: - -whitespace -- a string containing all characters considered whitespace -lowercase -- a string containing all characters considered lowercase letters -uppercase -- a string containing all characters considered uppercase letters -letters -- a string containing all characters considered letters -digits -- a string containing all characters considered decimal digits -hexdigits -- a string containing all characters considered hexadecimal digits -octdigits -- a string containing all characters considered octal digits - -""" -from warnings import warnpy3k -warnpy3k("the stringold module has been removed in Python 3.0", stacklevel=2) -del warnpy3k - -# Some strings for ctype-style character classification -whitespace = ' \t\n\r\v\f' -lowercase = 'abcdefghijklmnopqrstuvwxyz' -uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' -letters = lowercase + uppercase -digits = '0123456789' -hexdigits = digits + 'abcdef' + 'ABCDEF' -octdigits = '01234567' - -# Case conversion helpers -_idmap = '' -for i in range(256): _idmap = _idmap + chr(i) -del i - -# Backward compatible names for exceptions -index_error = ValueError -atoi_error = ValueError -atof_error = ValueError -atol_error = ValueError - -# convert UPPER CASE letters to lower case -def lower(s): - """lower(s) -> string - - Return a copy of the string s converted to lowercase. - - """ - return s.lower() - -# Convert lower case letters to UPPER CASE -def upper(s): - """upper(s) -> string - - Return a copy of the string s converted to uppercase. - - """ - return s.upper() - -# Swap lower case letters and UPPER CASE -def swapcase(s): - """swapcase(s) -> string - - Return a copy of the string s with upper case characters - converted to lowercase and vice versa. - - """ - return s.swapcase() - -# Strip leading and trailing tabs and spaces -def strip(s): - """strip(s) -> string - - Return a copy of the string s with leading and trailing - whitespace removed. - - """ - return s.strip() - -# Strip leading tabs and spaces -def lstrip(s): - """lstrip(s) -> string - - Return a copy of the string s with leading whitespace removed. - - """ - return s.lstrip() - -# Strip trailing tabs and spaces -def rstrip(s): - """rstrip(s) -> string - - Return a copy of the string s with trailing whitespace - removed. - - """ - return s.rstrip() - - -# Split a string into a list of space/tab-separated words -def split(s, sep=None, maxsplit=0): - """split(str [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string. If maxsplit is nonzero, splits into at most - maxsplit words If sep is not specified, any whitespace string - is a separator. Maxsplit defaults to 0. - - (split and splitfields are synonymous) - - """ - return s.split(sep, maxsplit) -splitfields = split - -# Join fields with optional separator -def join(words, sep = ' '): - """join(list [,sep]) -> string - - Return a string composed of the words in list, with - intervening occurrences of sep. The default separator is a - single space. - - (joinfields and join are synonymous) - - """ - return sep.join(words) -joinfields = join - -# for a little bit of speed -_apply = apply - -# Find substring, raise exception if not found -def index(s, *args): - """index(s, sub [,start [,end]]) -> int - - Like find but raises ValueError when the substring is not found. - - """ - return _apply(s.index, args) - -# Find last substring, raise exception if not found -def rindex(s, *args): - """rindex(s, sub [,start [,end]]) -> int - - Like rfind but raises ValueError when the substring is not found. - - """ - return _apply(s.rindex, args) - -# Count non-overlapping occurrences of substring -def count(s, *args): - """count(s, sub[, start[,end]]) -> int - - Return the number of occurrences of substring sub in string - s[start:end]. Optional arguments start and end are - interpreted as in slice notation. - - """ - return _apply(s.count, args) - -# Find substring, return -1 if not found -def find(s, *args): - """find(s, sub [,start [,end]]) -> in - - Return the lowest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - return _apply(s.find, args) - -# Find last substring, return -1 if not found -def rfind(s, *args): - """rfind(s, sub [,start [,end]]) -> int - - Return the highest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - return _apply(s.rfind, args) - -# for a bit of speed -_float = float -_int = int -_long = long -_StringType = type('') - -# Convert string to float -def atof(s): - """atof(s) -> float - - Return the floating point number represented by the string s. - - """ - if type(s) == _StringType: - return _float(s) - else: - raise TypeError('argument 1: expected string, %s found' % - type(s).__name__) - -# Convert string to integer -def atoi(*args): - """atoi(s [,base]) -> int - - Return the integer represented by the string s in the given - base, which defaults to 10. The string s must consist of one - or more digits, possibly preceded by a sign. If base is 0, it - is chosen from the leading characters of s, 0 for octal, 0x or - 0X for hexadecimal. If base is 16, a preceding 0x or 0X is - accepted. - - """ - try: - s = args[0] - except IndexError: - raise TypeError('function requires at least 1 argument: %d given' % - len(args)) - # Don't catch type error resulting from too many arguments to int(). The - # error message isn't compatible but the error type is, and this function - # is complicated enough already. - if type(s) == _StringType: - return _apply(_int, args) - else: - raise TypeError('argument 1: expected string, %s found' % - type(s).__name__) - - -# Convert string to long integer -def atol(*args): - """atol(s [,base]) -> long - - Return the long integer represented by the string s in the - given base, which defaults to 10. The string s must consist - of one or more digits, possibly preceded by a sign. If base - is 0, it is chosen from the leading characters of s, 0 for - octal, 0x or 0X for hexadecimal. If base is 16, a preceding - 0x or 0X is accepted. A trailing L or l is not accepted, - unless base is 0. - - """ - try: - s = args[0] - except IndexError: - raise TypeError('function requires at least 1 argument: %d given' % - len(args)) - # Don't catch type error resulting from too many arguments to long(). The - # error message isn't compatible but the error type is, and this function - # is complicated enough already. - if type(s) == _StringType: - return _apply(_long, args) - else: - raise TypeError('argument 1: expected string, %s found' % - type(s).__name__) - - -# Left-justify a string -def ljust(s, width): - """ljust(s, width) -> string - - Return a left-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. - - """ - n = width - len(s) - if n <= 0: return s - return s + ' '*n - -# Right-justify a string -def rjust(s, width): - """rjust(s, width) -> string - - Return a right-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. - - """ - n = width - len(s) - if n <= 0: return s - return ' '*n + s - -# Center a string -def center(s, width): - """center(s, width) -> string - - Return a center version of s, in a field of the specified - width. padded with spaces as needed. The string is never - truncated. - - """ - n = width - len(s) - if n <= 0: return s - half = n/2 - if n%2 and width%2: - # This ensures that center(center(s, i), j) = center(s, j) - half = half+1 - return ' '*half + s + ' '*(n-half) - -# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' -# Decadent feature: the argument may be a string or a number -# (Use of this is deprecated; it should be a string as with ljust c.s.) -def zfill(x, width): - """zfill(x, width) -> string - - Pad a numeric string x with zeros on the left, to fill a field - of the specified width. The string x is never truncated. - - """ - if type(x) == type(''): s = x - else: s = repr(x) - n = len(s) - if n >= width: return s - sign = '' - if s[0] in ('-', '+'): - sign, s = s[0], s[1:] - return sign + '0'*(width-n) + s - -# Expand tabs in a string. -# Doesn't take non-printing chars into account, but does understand \n. -def expandtabs(s, tabsize=8): - """expandtabs(s [,tabsize]) -> string - - Return a copy of the string s with all tab characters replaced - by the appropriate number of spaces, depending on the current - column, and the tabsize (default 8). - - """ - res = line = '' - for c in s: - if c == '\t': - c = ' '*(tabsize - len(line) % tabsize) - line = line + c - if c == '\n': - res = res + line - line = '' - return res + line - -# Character translation through look-up table. -def translate(s, table, deletions=""): - """translate(s,table [,deletechars]) -> string - - Return a copy of the string s, where all characters occurring - in the optional argument deletechars are removed, and the - remaining characters have been mapped through the given - translation table, which must be a string of length 256. - - """ - return s.translate(table, deletions) - -# Capitalize a string, e.g. "aBc dEf" -> "Abc def". -def capitalize(s): - """capitalize(s) -> string - - Return a copy of the string s with only its first character - capitalized. - - """ - return s.capitalize() - -# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". -def capwords(s, sep=None): - """capwords(s, [sep]) -> string - - Split the argument into words using split, capitalize each - word using capitalize, and join the capitalized words using - join. Note that this replaces runs of whitespace characters by - a single space. - - """ - return join(map(capitalize, s.split(sep)), sep or ' ') - -# Construct a translation string -_idmapL = None -def maketrans(fromstr, tostr): - """maketrans(frm, to) -> string - - Return a translation table (a string of 256 bytes long) - suitable for use in string.translate. The strings frm and to - must be of the same length. - - """ - if len(fromstr) != len(tostr): - raise ValueError, "maketrans arguments must have same length" - global _idmapL - if not _idmapL: - _idmapL = list(_idmap) - L = _idmapL[:] - fromstr = map(ord, fromstr) - for i in range(len(fromstr)): - L[fromstr[i]] = tostr[i] - return join(L, "") - -# Substring replacement (global) -def replace(s, old, new, maxsplit=0): - """replace (str, old, new[, maxsplit]) -> string - - Return a copy of string str with all occurrences of substring - old replaced by new. If the optional argument maxsplit is - given, only the first maxsplit occurrences are replaced. - - """ - return s.replace(old, new, maxsplit) - - -# XXX: transitional -# -# If string objects do not have methods, then we need to use the old string.py -# library, which uses strop for many more things than just the few outlined -# below. -try: - ''.upper -except AttributeError: - from stringold import * - -# Try importing optional built-in module "strop" -- if it exists, -# it redefines some string operations that are 100-1000 times faster. -# It also defines values for whitespace, lowercase and uppercase -# that match 's definitions. - -try: - from strop import maketrans, lowercase, uppercase, whitespace - letters = lowercase + uppercase -except ImportError: - pass # Use the original versions diff --git a/python/Lib/stringprep.py b/python/Lib/stringprep.py deleted file mode 100755 index 1d49dd1ddd..0000000000 --- a/python/Lib/stringprep.py +++ /dev/null @@ -1,272 +0,0 @@ -# This file is generated by mkstringprep.py. DO NOT EDIT. -"""Library that exposes various tables found in the StringPrep RFC 3454. - -There are two kinds of tables: sets, for which a member test is provided, -and mappings, for which a mapping function is provided. -""" - -from unicodedata import ucd_3_2_0 as unicodedata - -assert unicodedata.unidata_version == '3.2.0' - -def in_table_a1(code): - if unicodedata.category(code) != 'Cn': return False - c = ord(code) - if 0xFDD0 <= c < 0xFDF0: return False - return (c & 0xFFFF) not in (0xFFFE, 0xFFFF) - - -b1_set = set([173, 847, 6150, 6155, 6156, 6157, 8203, 8204, 8205, 8288, 65279] + range(65024,65040)) -def in_table_b1(code): - return ord(code) in b1_set - - -b3_exceptions = { -0xb5:u'\u03bc', 0xdf:u'ss', 0x130:u'i\u0307', 0x149:u'\u02bcn', -0x17f:u's', 0x1f0:u'j\u030c', 0x345:u'\u03b9', 0x37a:u' \u03b9', -0x390:u'\u03b9\u0308\u0301', 0x3b0:u'\u03c5\u0308\u0301', 0x3c2:u'\u03c3', 0x3d0:u'\u03b2', -0x3d1:u'\u03b8', 0x3d2:u'\u03c5', 0x3d3:u'\u03cd', 0x3d4:u'\u03cb', -0x3d5:u'\u03c6', 0x3d6:u'\u03c0', 0x3f0:u'\u03ba', 0x3f1:u'\u03c1', -0x3f2:u'\u03c3', 0x3f5:u'\u03b5', 0x587:u'\u0565\u0582', 0x1e96:u'h\u0331', -0x1e97:u't\u0308', 0x1e98:u'w\u030a', 0x1e99:u'y\u030a', 0x1e9a:u'a\u02be', -0x1e9b:u'\u1e61', 0x1f50:u'\u03c5\u0313', 0x1f52:u'\u03c5\u0313\u0300', 0x1f54:u'\u03c5\u0313\u0301', -0x1f56:u'\u03c5\u0313\u0342', 0x1f80:u'\u1f00\u03b9', 0x1f81:u'\u1f01\u03b9', 0x1f82:u'\u1f02\u03b9', -0x1f83:u'\u1f03\u03b9', 0x1f84:u'\u1f04\u03b9', 0x1f85:u'\u1f05\u03b9', 0x1f86:u'\u1f06\u03b9', -0x1f87:u'\u1f07\u03b9', 0x1f88:u'\u1f00\u03b9', 0x1f89:u'\u1f01\u03b9', 0x1f8a:u'\u1f02\u03b9', -0x1f8b:u'\u1f03\u03b9', 0x1f8c:u'\u1f04\u03b9', 0x1f8d:u'\u1f05\u03b9', 0x1f8e:u'\u1f06\u03b9', -0x1f8f:u'\u1f07\u03b9', 0x1f90:u'\u1f20\u03b9', 0x1f91:u'\u1f21\u03b9', 0x1f92:u'\u1f22\u03b9', -0x1f93:u'\u1f23\u03b9', 0x1f94:u'\u1f24\u03b9', 0x1f95:u'\u1f25\u03b9', 0x1f96:u'\u1f26\u03b9', -0x1f97:u'\u1f27\u03b9', 0x1f98:u'\u1f20\u03b9', 0x1f99:u'\u1f21\u03b9', 0x1f9a:u'\u1f22\u03b9', -0x1f9b:u'\u1f23\u03b9', 0x1f9c:u'\u1f24\u03b9', 0x1f9d:u'\u1f25\u03b9', 0x1f9e:u'\u1f26\u03b9', -0x1f9f:u'\u1f27\u03b9', 0x1fa0:u'\u1f60\u03b9', 0x1fa1:u'\u1f61\u03b9', 0x1fa2:u'\u1f62\u03b9', -0x1fa3:u'\u1f63\u03b9', 0x1fa4:u'\u1f64\u03b9', 0x1fa5:u'\u1f65\u03b9', 0x1fa6:u'\u1f66\u03b9', -0x1fa7:u'\u1f67\u03b9', 0x1fa8:u'\u1f60\u03b9', 0x1fa9:u'\u1f61\u03b9', 0x1faa:u'\u1f62\u03b9', -0x1fab:u'\u1f63\u03b9', 0x1fac:u'\u1f64\u03b9', 0x1fad:u'\u1f65\u03b9', 0x1fae:u'\u1f66\u03b9', -0x1faf:u'\u1f67\u03b9', 0x1fb2:u'\u1f70\u03b9', 0x1fb3:u'\u03b1\u03b9', 0x1fb4:u'\u03ac\u03b9', -0x1fb6:u'\u03b1\u0342', 0x1fb7:u'\u03b1\u0342\u03b9', 0x1fbc:u'\u03b1\u03b9', 0x1fbe:u'\u03b9', -0x1fc2:u'\u1f74\u03b9', 0x1fc3:u'\u03b7\u03b9', 0x1fc4:u'\u03ae\u03b9', 0x1fc6:u'\u03b7\u0342', -0x1fc7:u'\u03b7\u0342\u03b9', 0x1fcc:u'\u03b7\u03b9', 0x1fd2:u'\u03b9\u0308\u0300', 0x1fd3:u'\u03b9\u0308\u0301', -0x1fd6:u'\u03b9\u0342', 0x1fd7:u'\u03b9\u0308\u0342', 0x1fe2:u'\u03c5\u0308\u0300', 0x1fe3:u'\u03c5\u0308\u0301', -0x1fe4:u'\u03c1\u0313', 0x1fe6:u'\u03c5\u0342', 0x1fe7:u'\u03c5\u0308\u0342', 0x1ff2:u'\u1f7c\u03b9', -0x1ff3:u'\u03c9\u03b9', 0x1ff4:u'\u03ce\u03b9', 0x1ff6:u'\u03c9\u0342', 0x1ff7:u'\u03c9\u0342\u03b9', -0x1ffc:u'\u03c9\u03b9', 0x20a8:u'rs', 0x2102:u'c', 0x2103:u'\xb0c', -0x2107:u'\u025b', 0x2109:u'\xb0f', 0x210b:u'h', 0x210c:u'h', -0x210d:u'h', 0x2110:u'i', 0x2111:u'i', 0x2112:u'l', -0x2115:u'n', 0x2116:u'no', 0x2119:u'p', 0x211a:u'q', -0x211b:u'r', 0x211c:u'r', 0x211d:u'r', 0x2120:u'sm', -0x2121:u'tel', 0x2122:u'tm', 0x2124:u'z', 0x2128:u'z', -0x212c:u'b', 0x212d:u'c', 0x2130:u'e', 0x2131:u'f', -0x2133:u'm', 0x213e:u'\u03b3', 0x213f:u'\u03c0', 0x2145:u'd', -0x3371:u'hpa', 0x3373:u'au', 0x3375:u'ov', 0x3380:u'pa', -0x3381:u'na', 0x3382:u'\u03bca', 0x3383:u'ma', 0x3384:u'ka', -0x3385:u'kb', 0x3386:u'mb', 0x3387:u'gb', 0x338a:u'pf', -0x338b:u'nf', 0x338c:u'\u03bcf', 0x3390:u'hz', 0x3391:u'khz', -0x3392:u'mhz', 0x3393:u'ghz', 0x3394:u'thz', 0x33a9:u'pa', -0x33aa:u'kpa', 0x33ab:u'mpa', 0x33ac:u'gpa', 0x33b4:u'pv', -0x33b5:u'nv', 0x33b6:u'\u03bcv', 0x33b7:u'mv', 0x33b8:u'kv', -0x33b9:u'mv', 0x33ba:u'pw', 0x33bb:u'nw', 0x33bc:u'\u03bcw', -0x33bd:u'mw', 0x33be:u'kw', 0x33bf:u'mw', 0x33c0:u'k\u03c9', -0x33c1:u'm\u03c9', 0x33c3:u'bq', 0x33c6:u'c\u2215kg', 0x33c7:u'co.', -0x33c8:u'db', 0x33c9:u'gy', 0x33cb:u'hp', 0x33cd:u'kk', -0x33ce:u'km', 0x33d7:u'ph', 0x33d9:u'ppm', 0x33da:u'pr', -0x33dc:u'sv', 0x33dd:u'wb', 0xfb00:u'ff', 0xfb01:u'fi', -0xfb02:u'fl', 0xfb03:u'ffi', 0xfb04:u'ffl', 0xfb05:u'st', -0xfb06:u'st', 0xfb13:u'\u0574\u0576', 0xfb14:u'\u0574\u0565', 0xfb15:u'\u0574\u056b', -0xfb16:u'\u057e\u0576', 0xfb17:u'\u0574\u056d', 0x1d400:u'a', 0x1d401:u'b', -0x1d402:u'c', 0x1d403:u'd', 0x1d404:u'e', 0x1d405:u'f', -0x1d406:u'g', 0x1d407:u'h', 0x1d408:u'i', 0x1d409:u'j', -0x1d40a:u'k', 0x1d40b:u'l', 0x1d40c:u'm', 0x1d40d:u'n', -0x1d40e:u'o', 0x1d40f:u'p', 0x1d410:u'q', 0x1d411:u'r', -0x1d412:u's', 0x1d413:u't', 0x1d414:u'u', 0x1d415:u'v', -0x1d416:u'w', 0x1d417:u'x', 0x1d418:u'y', 0x1d419:u'z', -0x1d434:u'a', 0x1d435:u'b', 0x1d436:u'c', 0x1d437:u'd', -0x1d438:u'e', 0x1d439:u'f', 0x1d43a:u'g', 0x1d43b:u'h', -0x1d43c:u'i', 0x1d43d:u'j', 0x1d43e:u'k', 0x1d43f:u'l', -0x1d440:u'm', 0x1d441:u'n', 0x1d442:u'o', 0x1d443:u'p', -0x1d444:u'q', 0x1d445:u'r', 0x1d446:u's', 0x1d447:u't', -0x1d448:u'u', 0x1d449:u'v', 0x1d44a:u'w', 0x1d44b:u'x', -0x1d44c:u'y', 0x1d44d:u'z', 0x1d468:u'a', 0x1d469:u'b', -0x1d46a:u'c', 0x1d46b:u'd', 0x1d46c:u'e', 0x1d46d:u'f', -0x1d46e:u'g', 0x1d46f:u'h', 0x1d470:u'i', 0x1d471:u'j', -0x1d472:u'k', 0x1d473:u'l', 0x1d474:u'm', 0x1d475:u'n', -0x1d476:u'o', 0x1d477:u'p', 0x1d478:u'q', 0x1d479:u'r', -0x1d47a:u's', 0x1d47b:u't', 0x1d47c:u'u', 0x1d47d:u'v', -0x1d47e:u'w', 0x1d47f:u'x', 0x1d480:u'y', 0x1d481:u'z', -0x1d49c:u'a', 0x1d49e:u'c', 0x1d49f:u'd', 0x1d4a2:u'g', -0x1d4a5:u'j', 0x1d4a6:u'k', 0x1d4a9:u'n', 0x1d4aa:u'o', -0x1d4ab:u'p', 0x1d4ac:u'q', 0x1d4ae:u's', 0x1d4af:u't', -0x1d4b0:u'u', 0x1d4b1:u'v', 0x1d4b2:u'w', 0x1d4b3:u'x', -0x1d4b4:u'y', 0x1d4b5:u'z', 0x1d4d0:u'a', 0x1d4d1:u'b', -0x1d4d2:u'c', 0x1d4d3:u'd', 0x1d4d4:u'e', 0x1d4d5:u'f', -0x1d4d6:u'g', 0x1d4d7:u'h', 0x1d4d8:u'i', 0x1d4d9:u'j', -0x1d4da:u'k', 0x1d4db:u'l', 0x1d4dc:u'm', 0x1d4dd:u'n', -0x1d4de:u'o', 0x1d4df:u'p', 0x1d4e0:u'q', 0x1d4e1:u'r', -0x1d4e2:u's', 0x1d4e3:u't', 0x1d4e4:u'u', 0x1d4e5:u'v', -0x1d4e6:u'w', 0x1d4e7:u'x', 0x1d4e8:u'y', 0x1d4e9:u'z', -0x1d504:u'a', 0x1d505:u'b', 0x1d507:u'd', 0x1d508:u'e', -0x1d509:u'f', 0x1d50a:u'g', 0x1d50d:u'j', 0x1d50e:u'k', -0x1d50f:u'l', 0x1d510:u'm', 0x1d511:u'n', 0x1d512:u'o', -0x1d513:u'p', 0x1d514:u'q', 0x1d516:u's', 0x1d517:u't', -0x1d518:u'u', 0x1d519:u'v', 0x1d51a:u'w', 0x1d51b:u'x', -0x1d51c:u'y', 0x1d538:u'a', 0x1d539:u'b', 0x1d53b:u'd', -0x1d53c:u'e', 0x1d53d:u'f', 0x1d53e:u'g', 0x1d540:u'i', -0x1d541:u'j', 0x1d542:u'k', 0x1d543:u'l', 0x1d544:u'm', -0x1d546:u'o', 0x1d54a:u's', 0x1d54b:u't', 0x1d54c:u'u', -0x1d54d:u'v', 0x1d54e:u'w', 0x1d54f:u'x', 0x1d550:u'y', -0x1d56c:u'a', 0x1d56d:u'b', 0x1d56e:u'c', 0x1d56f:u'd', -0x1d570:u'e', 0x1d571:u'f', 0x1d572:u'g', 0x1d573:u'h', -0x1d574:u'i', 0x1d575:u'j', 0x1d576:u'k', 0x1d577:u'l', -0x1d578:u'm', 0x1d579:u'n', 0x1d57a:u'o', 0x1d57b:u'p', -0x1d57c:u'q', 0x1d57d:u'r', 0x1d57e:u's', 0x1d57f:u't', -0x1d580:u'u', 0x1d581:u'v', 0x1d582:u'w', 0x1d583:u'x', -0x1d584:u'y', 0x1d585:u'z', 0x1d5a0:u'a', 0x1d5a1:u'b', -0x1d5a2:u'c', 0x1d5a3:u'd', 0x1d5a4:u'e', 0x1d5a5:u'f', -0x1d5a6:u'g', 0x1d5a7:u'h', 0x1d5a8:u'i', 0x1d5a9:u'j', -0x1d5aa:u'k', 0x1d5ab:u'l', 0x1d5ac:u'm', 0x1d5ad:u'n', -0x1d5ae:u'o', 0x1d5af:u'p', 0x1d5b0:u'q', 0x1d5b1:u'r', -0x1d5b2:u's', 0x1d5b3:u't', 0x1d5b4:u'u', 0x1d5b5:u'v', -0x1d5b6:u'w', 0x1d5b7:u'x', 0x1d5b8:u'y', 0x1d5b9:u'z', -0x1d5d4:u'a', 0x1d5d5:u'b', 0x1d5d6:u'c', 0x1d5d7:u'd', -0x1d5d8:u'e', 0x1d5d9:u'f', 0x1d5da:u'g', 0x1d5db:u'h', -0x1d5dc:u'i', 0x1d5dd:u'j', 0x1d5de:u'k', 0x1d5df:u'l', -0x1d5e0:u'm', 0x1d5e1:u'n', 0x1d5e2:u'o', 0x1d5e3:u'p', -0x1d5e4:u'q', 0x1d5e5:u'r', 0x1d5e6:u's', 0x1d5e7:u't', -0x1d5e8:u'u', 0x1d5e9:u'v', 0x1d5ea:u'w', 0x1d5eb:u'x', -0x1d5ec:u'y', 0x1d5ed:u'z', 0x1d608:u'a', 0x1d609:u'b', -0x1d60a:u'c', 0x1d60b:u'd', 0x1d60c:u'e', 0x1d60d:u'f', -0x1d60e:u'g', 0x1d60f:u'h', 0x1d610:u'i', 0x1d611:u'j', -0x1d612:u'k', 0x1d613:u'l', 0x1d614:u'm', 0x1d615:u'n', -0x1d616:u'o', 0x1d617:u'p', 0x1d618:u'q', 0x1d619:u'r', -0x1d61a:u's', 0x1d61b:u't', 0x1d61c:u'u', 0x1d61d:u'v', -0x1d61e:u'w', 0x1d61f:u'x', 0x1d620:u'y', 0x1d621:u'z', -0x1d63c:u'a', 0x1d63d:u'b', 0x1d63e:u'c', 0x1d63f:u'd', -0x1d640:u'e', 0x1d641:u'f', 0x1d642:u'g', 0x1d643:u'h', -0x1d644:u'i', 0x1d645:u'j', 0x1d646:u'k', 0x1d647:u'l', -0x1d648:u'm', 0x1d649:u'n', 0x1d64a:u'o', 0x1d64b:u'p', -0x1d64c:u'q', 0x1d64d:u'r', 0x1d64e:u's', 0x1d64f:u't', -0x1d650:u'u', 0x1d651:u'v', 0x1d652:u'w', 0x1d653:u'x', -0x1d654:u'y', 0x1d655:u'z', 0x1d670:u'a', 0x1d671:u'b', -0x1d672:u'c', 0x1d673:u'd', 0x1d674:u'e', 0x1d675:u'f', -0x1d676:u'g', 0x1d677:u'h', 0x1d678:u'i', 0x1d679:u'j', -0x1d67a:u'k', 0x1d67b:u'l', 0x1d67c:u'm', 0x1d67d:u'n', -0x1d67e:u'o', 0x1d67f:u'p', 0x1d680:u'q', 0x1d681:u'r', -0x1d682:u's', 0x1d683:u't', 0x1d684:u'u', 0x1d685:u'v', -0x1d686:u'w', 0x1d687:u'x', 0x1d688:u'y', 0x1d689:u'z', -0x1d6a8:u'\u03b1', 0x1d6a9:u'\u03b2', 0x1d6aa:u'\u03b3', 0x1d6ab:u'\u03b4', -0x1d6ac:u'\u03b5', 0x1d6ad:u'\u03b6', 0x1d6ae:u'\u03b7', 0x1d6af:u'\u03b8', -0x1d6b0:u'\u03b9', 0x1d6b1:u'\u03ba', 0x1d6b2:u'\u03bb', 0x1d6b3:u'\u03bc', -0x1d6b4:u'\u03bd', 0x1d6b5:u'\u03be', 0x1d6b6:u'\u03bf', 0x1d6b7:u'\u03c0', -0x1d6b8:u'\u03c1', 0x1d6b9:u'\u03b8', 0x1d6ba:u'\u03c3', 0x1d6bb:u'\u03c4', -0x1d6bc:u'\u03c5', 0x1d6bd:u'\u03c6', 0x1d6be:u'\u03c7', 0x1d6bf:u'\u03c8', -0x1d6c0:u'\u03c9', 0x1d6d3:u'\u03c3', 0x1d6e2:u'\u03b1', 0x1d6e3:u'\u03b2', -0x1d6e4:u'\u03b3', 0x1d6e5:u'\u03b4', 0x1d6e6:u'\u03b5', 0x1d6e7:u'\u03b6', -0x1d6e8:u'\u03b7', 0x1d6e9:u'\u03b8', 0x1d6ea:u'\u03b9', 0x1d6eb:u'\u03ba', -0x1d6ec:u'\u03bb', 0x1d6ed:u'\u03bc', 0x1d6ee:u'\u03bd', 0x1d6ef:u'\u03be', -0x1d6f0:u'\u03bf', 0x1d6f1:u'\u03c0', 0x1d6f2:u'\u03c1', 0x1d6f3:u'\u03b8', -0x1d6f4:u'\u03c3', 0x1d6f5:u'\u03c4', 0x1d6f6:u'\u03c5', 0x1d6f7:u'\u03c6', -0x1d6f8:u'\u03c7', 0x1d6f9:u'\u03c8', 0x1d6fa:u'\u03c9', 0x1d70d:u'\u03c3', -0x1d71c:u'\u03b1', 0x1d71d:u'\u03b2', 0x1d71e:u'\u03b3', 0x1d71f:u'\u03b4', -0x1d720:u'\u03b5', 0x1d721:u'\u03b6', 0x1d722:u'\u03b7', 0x1d723:u'\u03b8', -0x1d724:u'\u03b9', 0x1d725:u'\u03ba', 0x1d726:u'\u03bb', 0x1d727:u'\u03bc', -0x1d728:u'\u03bd', 0x1d729:u'\u03be', 0x1d72a:u'\u03bf', 0x1d72b:u'\u03c0', -0x1d72c:u'\u03c1', 0x1d72d:u'\u03b8', 0x1d72e:u'\u03c3', 0x1d72f:u'\u03c4', -0x1d730:u'\u03c5', 0x1d731:u'\u03c6', 0x1d732:u'\u03c7', 0x1d733:u'\u03c8', -0x1d734:u'\u03c9', 0x1d747:u'\u03c3', 0x1d756:u'\u03b1', 0x1d757:u'\u03b2', -0x1d758:u'\u03b3', 0x1d759:u'\u03b4', 0x1d75a:u'\u03b5', 0x1d75b:u'\u03b6', -0x1d75c:u'\u03b7', 0x1d75d:u'\u03b8', 0x1d75e:u'\u03b9', 0x1d75f:u'\u03ba', -0x1d760:u'\u03bb', 0x1d761:u'\u03bc', 0x1d762:u'\u03bd', 0x1d763:u'\u03be', -0x1d764:u'\u03bf', 0x1d765:u'\u03c0', 0x1d766:u'\u03c1', 0x1d767:u'\u03b8', -0x1d768:u'\u03c3', 0x1d769:u'\u03c4', 0x1d76a:u'\u03c5', 0x1d76b:u'\u03c6', -0x1d76c:u'\u03c7', 0x1d76d:u'\u03c8', 0x1d76e:u'\u03c9', 0x1d781:u'\u03c3', -0x1d790:u'\u03b1', 0x1d791:u'\u03b2', 0x1d792:u'\u03b3', 0x1d793:u'\u03b4', -0x1d794:u'\u03b5', 0x1d795:u'\u03b6', 0x1d796:u'\u03b7', 0x1d797:u'\u03b8', -0x1d798:u'\u03b9', 0x1d799:u'\u03ba', 0x1d79a:u'\u03bb', 0x1d79b:u'\u03bc', -0x1d79c:u'\u03bd', 0x1d79d:u'\u03be', 0x1d79e:u'\u03bf', 0x1d79f:u'\u03c0', -0x1d7a0:u'\u03c1', 0x1d7a1:u'\u03b8', 0x1d7a2:u'\u03c3', 0x1d7a3:u'\u03c4', -0x1d7a4:u'\u03c5', 0x1d7a5:u'\u03c6', 0x1d7a6:u'\u03c7', 0x1d7a7:u'\u03c8', -0x1d7a8:u'\u03c9', 0x1d7bb:u'\u03c3', } - -def map_table_b3(code): - r = b3_exceptions.get(ord(code)) - if r is not None: return r - return code.lower() - - -def map_table_b2(a): - al = map_table_b3(a) - b = unicodedata.normalize("NFKC", al) - bl = u"".join([map_table_b3(ch) for ch in b]) - c = unicodedata.normalize("NFKC", bl) - if b != c: - return c - else: - return al - - -def in_table_c11(code): - return code == u" " - - -def in_table_c12(code): - return unicodedata.category(code) == "Zs" and code != u" " - -def in_table_c11_c12(code): - return unicodedata.category(code) == "Zs" - - -def in_table_c21(code): - return ord(code) < 128 and unicodedata.category(code) == "Cc" - -c22_specials = set([1757, 1807, 6158, 8204, 8205, 8232, 8233, 65279] + range(8288,8292) + range(8298,8304) + range(65529,65533) + range(119155,119163)) -def in_table_c22(code): - c = ord(code) - if c < 128: return False - if unicodedata.category(code) == "Cc": return True - return c in c22_specials - -def in_table_c21_c22(code): - return unicodedata.category(code) == "Cc" or \ - ord(code) in c22_specials - - -def in_table_c3(code): - return unicodedata.category(code) == "Co" - - -def in_table_c4(code): - c = ord(code) - if c < 0xFDD0: return False - if c < 0xFDF0: return True - return (ord(code) & 0xFFFF) in (0xFFFE, 0xFFFF) - - -def in_table_c5(code): - return unicodedata.category(code) == "Cs" - - -c6_set = set(range(65529,65534)) -def in_table_c6(code): - return ord(code) in c6_set - - -c7_set = set(range(12272,12284)) -def in_table_c7(code): - return ord(code) in c7_set - - -c8_set = set([832, 833, 8206, 8207] + range(8234,8239) + range(8298,8304)) -def in_table_c8(code): - return ord(code) in c8_set - - -c9_set = set([917505] + range(917536,917632)) -def in_table_c9(code): - return ord(code) in c9_set - - -def in_table_d1(code): - return unicodedata.bidirectional(code) in ("R","AL") - - -def in_table_d2(code): - return unicodedata.bidirectional(code) == "L" diff --git a/python/Lib/struct.py b/python/Lib/struct.py deleted file mode 100755 index b022355c18..0000000000 --- a/python/Lib/struct.py +++ /dev/null @@ -1,3 +0,0 @@ -from _struct import * -from _struct import _clearcache -from _struct import __doc__ diff --git a/python/Lib/subprocess.py b/python/Lib/subprocess.py deleted file mode 100755 index 0dfce35a64..0000000000 --- a/python/Lib/subprocess.py +++ /dev/null @@ -1,1318 +0,0 @@ -# subprocess - Subprocesses with accessible I/O streams -# -# For more information about this module, see PEP 324. -# -# Copyright (c) 2003-2005 by Peter Astrand -# -# Licensed to PSF under a Contributor Agreement. -# See http://www.python.org/2.4/license for licensing details. - -r"""Subprocesses with accessible I/O streams - -This module allows you to spawn processes, connect to their -input/output/error pipes, and obtain their return codes. - -For a complete description of this module see the Python documentation. - -Main API -======== -call(...): Runs a command, waits for it to complete, then returns - the return code. -check_call(...): Same as call() but raises CalledProcessError() - if return code is not 0 -check_output(...): Same as check_call() but returns the contents of - stdout instead of a return code -Popen(...): A class for flexibly executing a command in a new process - -Constants ---------- -PIPE: Special value that indicates a pipe should be created -STDOUT: Special value that indicates that stderr should go to stdout -""" - -import sys -mswindows = (sys.platform == "win32") - -import os -import types -import traceback -import gc -import signal -import errno - -# Exception classes used by this module. -class CalledProcessError(Exception): - """This exception is raised when a process run by check_call() or - check_output() returns a non-zero exit status. - - Attributes: - cmd, returncode, output - """ - def __init__(self, returncode, cmd, output=None): - self.returncode = returncode - self.cmd = cmd - self.output = output - def __str__(self): - return "Command '%s' returned non-zero exit status %d" % (self.cmd, self.returncode) - - -if mswindows: - import threading - import msvcrt - import _subprocess - class STARTUPINFO: - dwFlags = 0 - hStdInput = None - hStdOutput = None - hStdError = None - wShowWindow = 0 - class pywintypes: - error = IOError -else: - import select - _has_poll = hasattr(select, 'poll') - import fcntl - import pickle - - # When select or poll has indicated that the file is writable, - # we can write up to _PIPE_BUF bytes without risk of blocking. - # POSIX defines PIPE_BUF as >= 512. - _PIPE_BUF = getattr(select, 'PIPE_BUF', 512) - - -__all__ = ["Popen", "PIPE", "STDOUT", "call", "check_call", - "check_output", "CalledProcessError"] - -if mswindows: - from _subprocess import (CREATE_NEW_CONSOLE, CREATE_NEW_PROCESS_GROUP, - STD_INPUT_HANDLE, STD_OUTPUT_HANDLE, - STD_ERROR_HANDLE, SW_HIDE, - STARTF_USESTDHANDLES, STARTF_USESHOWWINDOW) - - __all__.extend(["CREATE_NEW_CONSOLE", "CREATE_NEW_PROCESS_GROUP", - "STD_INPUT_HANDLE", "STD_OUTPUT_HANDLE", - "STD_ERROR_HANDLE", "SW_HIDE", - "STARTF_USESTDHANDLES", "STARTF_USESHOWWINDOW"]) -try: - MAXFD = os.sysconf("SC_OPEN_MAX") -except: - MAXFD = 256 - -_active = [] - -def _cleanup(): - for inst in _active[:]: - res = inst._internal_poll(_deadstate=sys.maxint) - if res is not None: - try: - _active.remove(inst) - except ValueError: - # This can happen if two threads create a new Popen instance. - # It's harmless that it was already removed, so ignore. - pass - -PIPE = -1 -STDOUT = -2 - - -def _eintr_retry_call(func, *args): - while True: - try: - return func(*args) - except (OSError, IOError) as e: - if e.errno == errno.EINTR: - continue - raise - - -# XXX This function is only used by multiprocessing and the test suite, -# but it's here so that it can be imported when Python is compiled without -# threads. - -def _args_from_interpreter_flags(): - """Return a list of command-line arguments reproducing the current - settings in sys.flags and sys.warnoptions.""" - flag_opt_map = { - 'debug': 'd', - # 'inspect': 'i', - # 'interactive': 'i', - 'optimize': 'O', - 'dont_write_bytecode': 'B', - 'no_user_site': 's', - 'no_site': 'S', - 'ignore_environment': 'E', - 'verbose': 'v', - 'bytes_warning': 'b', - 'py3k_warning': '3', - } - args = [] - for flag, opt in flag_opt_map.items(): - v = getattr(sys.flags, flag) - if v > 0: - args.append('-' + opt * v) - if getattr(sys.flags, 'hash_randomization') != 0: - args.append('-R') - for opt in sys.warnoptions: - args.append('-W' + opt) - return args - - -def call(*popenargs, **kwargs): - """Run command with arguments. Wait for command to complete, then - return the returncode attribute. - - The arguments are the same as for the Popen constructor. Example: - - retcode = call(["ls", "-l"]) - """ - return Popen(*popenargs, **kwargs).wait() - - -def check_call(*popenargs, **kwargs): - """Run command with arguments. Wait for command to complete. If - the exit code was zero then return, otherwise raise - CalledProcessError. The CalledProcessError object will have the - return code in the returncode attribute. - - The arguments are the same as for the Popen constructor. Example: - - check_call(["ls", "-l"]) - """ - retcode = call(*popenargs, **kwargs) - if retcode: - cmd = kwargs.get("args") - if cmd is None: - cmd = popenargs[0] - raise CalledProcessError(retcode, cmd) - return 0 - - -def check_output(*popenargs, **kwargs): - r"""Run command with arguments and return its output as a byte string. - - If the exit code was non-zero it raises a CalledProcessError. The - CalledProcessError object will have the return code in the returncode - attribute and output in the output attribute. - - The arguments are the same as for the Popen constructor. Example: - - >>> check_output(["ls", "-l", "/dev/null"]) - 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n' - - The stdout argument is not allowed as it is used internally. - To capture standard error in the result, use stderr=STDOUT. - - >>> check_output(["/bin/sh", "-c", - ... "ls -l non_existent_file ; exit 0"], - ... stderr=STDOUT) - 'ls: non_existent_file: No such file or directory\n' - """ - if 'stdout' in kwargs: - raise ValueError('stdout argument not allowed, it will be overridden.') - process = Popen(stdout=PIPE, *popenargs, **kwargs) - output, unused_err = process.communicate() - retcode = process.poll() - if retcode: - cmd = kwargs.get("args") - if cmd is None: - cmd = popenargs[0] - raise CalledProcessError(retcode, cmd, output=output) - return output - - -def list2cmdline(seq): - """ - Translate a sequence of arguments into a command line - string, using the same rules as the MS C runtime: - - 1) Arguments are delimited by white space, which is either a - space or a tab. - - 2) A string surrounded by double quotation marks is - interpreted as a single argument, regardless of white space - contained within. A quoted string can be embedded in an - argument. - - 3) A double quotation mark preceded by a backslash is - interpreted as a literal double quotation mark. - - 4) Backslashes are interpreted literally, unless they - immediately precede a double quotation mark. - - 5) If backslashes immediately precede a double quotation mark, - every pair of backslashes is interpreted as a literal - backslash. If the number of backslashes is odd, the last - backslash escapes the next double quotation mark as - described in rule 3. - """ - - # See - # http://msdn.microsoft.com/en-us/library/17w5ykft.aspx - # or search http://msdn.microsoft.com for - # "Parsing C++ Command-Line Arguments" - result = [] - needquote = False - for arg in seq: - bs_buf = [] - - # Add a space to separate this argument from the others - if result: - result.append(' ') - - needquote = (" " in arg) or ("\t" in arg) or not arg - if needquote: - result.append('"') - - for c in arg: - if c == '\\': - # Don't know if we need to double yet. - bs_buf.append(c) - elif c == '"': - # Double backslashes. - result.append('\\' * len(bs_buf)*2) - bs_buf = [] - result.append('\\"') - else: - # Normal char - if bs_buf: - result.extend(bs_buf) - bs_buf = [] - result.append(c) - - # Add remaining backslashes, if any. - if bs_buf: - result.extend(bs_buf) - - if needquote: - result.extend(bs_buf) - result.append('"') - - return ''.join(result) - - -class Popen(object): - """ Execute a child program in a new process. - - For a complete description of the arguments see the Python documentation. - - Arguments: - args: A string, or a sequence of program arguments. - - bufsize: supplied as the buffering argument to the open() function when - creating the stdin/stdout/stderr pipe file objects - - executable: A replacement program to execute. - - stdin, stdout and stderr: These specify the executed programs' standard - input, standard output and standard error file handles, respectively. - - preexec_fn: (POSIX only) An object to be called in the child process - just before the child is executed. - - close_fds: Controls closing or inheriting of file descriptors. - - shell: If true, the command will be executed through the shell. - - cwd: Sets the current directory before the child is executed. - - env: Defines the environment variables for the new process. - - universal_newlines: If true, use universal line endings for file - objects stdin, stdout and stderr. - - startupinfo and creationflags (Windows only) - - Attributes: - stdin, stdout, stderr, pid, returncode - """ - _child_created = False # Set here since __del__ checks it - - def __init__(self, args, bufsize=0, executable=None, - stdin=None, stdout=None, stderr=None, - preexec_fn=None, close_fds=False, shell=False, - cwd=None, env=None, universal_newlines=False, - startupinfo=None, creationflags=0): - """Create new Popen instance.""" - _cleanup() - - if not isinstance(bufsize, (int, long)): - raise TypeError("bufsize must be an integer") - - if mswindows: - if preexec_fn is not None: - raise ValueError("preexec_fn is not supported on Windows " - "platforms") - if close_fds and (stdin is not None or stdout is not None or - stderr is not None): - raise ValueError("close_fds is not supported on Windows " - "platforms if you redirect stdin/stdout/stderr") - else: - # POSIX - if startupinfo is not None: - raise ValueError("startupinfo is only supported on Windows " - "platforms") - if creationflags != 0: - raise ValueError("creationflags is only supported on Windows " - "platforms") - - self.stdin = None - self.stdout = None - self.stderr = None - self.pid = None - self.returncode = None - self.universal_newlines = universal_newlines - - # Input and output objects. The general principle is like - # this: - # - # Parent Child - # ------ ----- - # p2cwrite ---stdin---> p2cread - # c2pread <--stdout--- c2pwrite - # errread <--stderr--- errwrite - # - # On POSIX, the child objects are file descriptors. On - # Windows, these are Windows file handles. The parent objects - # are file descriptors on both platforms. The parent objects - # are None when not using PIPEs. The child objects are None - # when not redirecting. - - (p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite), to_close = self._get_handles(stdin, stdout, stderr) - - try: - self._execute_child(args, executable, preexec_fn, close_fds, - cwd, env, universal_newlines, - startupinfo, creationflags, shell, to_close, - p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite) - except Exception: - # Preserve original exception in case os.close raises. - exc_type, exc_value, exc_trace = sys.exc_info() - - for fd in to_close: - try: - if mswindows: - fd.Close() - else: - os.close(fd) - except EnvironmentError: - pass - - raise exc_type, exc_value, exc_trace - - if mswindows: - if p2cwrite is not None: - p2cwrite = msvcrt.open_osfhandle(p2cwrite.Detach(), 0) - if c2pread is not None: - c2pread = msvcrt.open_osfhandle(c2pread.Detach(), 0) - if errread is not None: - errread = msvcrt.open_osfhandle(errread.Detach(), 0) - - if p2cwrite is not None: - self.stdin = os.fdopen(p2cwrite, 'wb', bufsize) - if c2pread is not None: - if universal_newlines: - self.stdout = os.fdopen(c2pread, 'rU', bufsize) - else: - self.stdout = os.fdopen(c2pread, 'rb', bufsize) - if errread is not None: - if universal_newlines: - self.stderr = os.fdopen(errread, 'rU', bufsize) - else: - self.stderr = os.fdopen(errread, 'rb', bufsize) - - - def _translate_newlines(self, data): - data = data.replace("\r\n", "\n") - data = data.replace("\r", "\n") - return data - - - def __del__(self, _maxint=sys.maxint): - # If __init__ hasn't had a chance to execute (e.g. if it - # was passed an undeclared keyword argument), we don't - # have a _child_created attribute at all. - if not self._child_created: - # We didn't get to successfully create a child process. - return - # In case the child hasn't been waited on, check if it's done. - self._internal_poll(_deadstate=_maxint) - if self.returncode is None and _active is not None: - # Child is still running, keep us alive until we can wait on it. - _active.append(self) - - - def communicate(self, input=None): - """Interact with process: Send data to stdin. Read data from - stdout and stderr, until end-of-file is reached. Wait for - process to terminate. The optional input argument should be a - string to be sent to the child process, or None, if no data - should be sent to the child. - - communicate() returns a tuple (stdout, stderr).""" - - # Optimization: If we are only using one pipe, or no pipe at - # all, using select() or threads is unnecessary. - if [self.stdin, self.stdout, self.stderr].count(None) >= 2: - stdout = None - stderr = None - if self.stdin: - if input: - try: - self.stdin.write(input) - except IOError as e: - if e.errno != errno.EPIPE and e.errno != errno.EINVAL: - raise - self.stdin.close() - elif self.stdout: - stdout = _eintr_retry_call(self.stdout.read) - self.stdout.close() - elif self.stderr: - stderr = _eintr_retry_call(self.stderr.read) - self.stderr.close() - self.wait() - return (stdout, stderr) - - return self._communicate(input) - - - def poll(self): - """Check if child process has terminated. Set and return returncode - attribute.""" - return self._internal_poll() - - - if mswindows: - # - # Windows methods - # - def _get_handles(self, stdin, stdout, stderr): - """Construct and return tuple with IO objects: - p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite - """ - to_close = set() - if stdin is None and stdout is None and stderr is None: - return (None, None, None, None, None, None), to_close - - p2cread, p2cwrite = None, None - c2pread, c2pwrite = None, None - errread, errwrite = None, None - - if stdin is None: - p2cread = _subprocess.GetStdHandle(_subprocess.STD_INPUT_HANDLE) - if p2cread is None: - p2cread, _ = _subprocess.CreatePipe(None, 0) - elif stdin == PIPE: - p2cread, p2cwrite = _subprocess.CreatePipe(None, 0) - elif isinstance(stdin, int): - p2cread = msvcrt.get_osfhandle(stdin) - else: - # Assuming file-like object - p2cread = msvcrt.get_osfhandle(stdin.fileno()) - p2cread = self._make_inheritable(p2cread) - # We just duplicated the handle, it has to be closed at the end - to_close.add(p2cread) - if stdin == PIPE: - to_close.add(p2cwrite) - - if stdout is None: - c2pwrite = _subprocess.GetStdHandle(_subprocess.STD_OUTPUT_HANDLE) - if c2pwrite is None: - _, c2pwrite = _subprocess.CreatePipe(None, 0) - elif stdout == PIPE: - c2pread, c2pwrite = _subprocess.CreatePipe(None, 0) - elif isinstance(stdout, int): - c2pwrite = msvcrt.get_osfhandle(stdout) - else: - # Assuming file-like object - c2pwrite = msvcrt.get_osfhandle(stdout.fileno()) - c2pwrite = self._make_inheritable(c2pwrite) - # We just duplicated the handle, it has to be closed at the end - to_close.add(c2pwrite) - if stdout == PIPE: - to_close.add(c2pread) - - if stderr is None: - errwrite = _subprocess.GetStdHandle(_subprocess.STD_ERROR_HANDLE) - if errwrite is None: - _, errwrite = _subprocess.CreatePipe(None, 0) - elif stderr == PIPE: - errread, errwrite = _subprocess.CreatePipe(None, 0) - elif stderr == STDOUT: - errwrite = c2pwrite - elif isinstance(stderr, int): - errwrite = msvcrt.get_osfhandle(stderr) - else: - # Assuming file-like object - errwrite = msvcrt.get_osfhandle(stderr.fileno()) - errwrite = self._make_inheritable(errwrite) - # We just duplicated the handle, it has to be closed at the end - to_close.add(errwrite) - if stderr == PIPE: - to_close.add(errread) - - return (p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite), to_close - - - def _make_inheritable(self, handle): - """Return a duplicate of handle, which is inheritable""" - return _subprocess.DuplicateHandle(_subprocess.GetCurrentProcess(), - handle, _subprocess.GetCurrentProcess(), 0, 1, - _subprocess.DUPLICATE_SAME_ACCESS) - - - def _find_w9xpopen(self): - """Find and return absolut path to w9xpopen.exe""" - w9xpopen = os.path.join( - os.path.dirname(_subprocess.GetModuleFileName(0)), - "w9xpopen.exe") - if not os.path.exists(w9xpopen): - # Eeek - file-not-found - possibly an embedding - # situation - see if we can locate it in sys.exec_prefix - w9xpopen = os.path.join(os.path.dirname(sys.exec_prefix), - "w9xpopen.exe") - if not os.path.exists(w9xpopen): - raise RuntimeError("Cannot locate w9xpopen.exe, which is " - "needed for Popen to work with your " - "shell or platform.") - return w9xpopen - - - def _execute_child(self, args, executable, preexec_fn, close_fds, - cwd, env, universal_newlines, - startupinfo, creationflags, shell, to_close, - p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite): - """Execute program (MS Windows version)""" - - if not isinstance(args, types.StringTypes): - args = list2cmdline(args) - - # Process startup details - if startupinfo is None: - startupinfo = STARTUPINFO() - if None not in (p2cread, c2pwrite, errwrite): - startupinfo.dwFlags |= _subprocess.STARTF_USESTDHANDLES - startupinfo.hStdInput = p2cread - startupinfo.hStdOutput = c2pwrite - startupinfo.hStdError = errwrite - - if shell: - startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW - startupinfo.wShowWindow = _subprocess.SW_HIDE - comspec = os.environ.get("COMSPEC", "cmd.exe") - args = '{} /c "{}"'.format (comspec, args) - if (_subprocess.GetVersion() >= 0x80000000 or - os.path.basename(comspec).lower() == "command.com"): - # Win9x, or using command.com on NT. We need to - # use the w9xpopen intermediate program. For more - # information, see KB Q150956 - # (http://web.archive.org/web/20011105084002/http://support.microsoft.com/support/kb/articles/Q150/9/56.asp) - w9xpopen = self._find_w9xpopen() - args = '"%s" %s' % (w9xpopen, args) - # Not passing CREATE_NEW_CONSOLE has been known to - # cause random failures on win9x. Specifically a - # dialog: "Your program accessed mem currently in - # use at xxx" and a hopeful warning about the - # stability of your system. Cost is Ctrl+C wont - # kill children. - creationflags |= _subprocess.CREATE_NEW_CONSOLE - - def _close_in_parent(fd): - fd.Close() - to_close.remove(fd) - - # Start the process - try: - hp, ht, pid, tid = _subprocess.CreateProcess(executable, args, - # no special security - None, None, - int(not close_fds), - creationflags, - env, - cwd, - startupinfo) - except pywintypes.error, e: - # Translate pywintypes.error to WindowsError, which is - # a subclass of OSError. FIXME: We should really - # translate errno using _sys_errlist (or similar), but - # how can this be done from Python? - raise WindowsError(*e.args) - finally: - # Child is launched. Close the parent's copy of those pipe - # handles that only the child should have open. You need - # to make sure that no handles to the write end of the - # output pipe are maintained in this process or else the - # pipe will not close when the child process exits and the - # ReadFile will hang. - if p2cread is not None: - _close_in_parent(p2cread) - if c2pwrite is not None: - _close_in_parent(c2pwrite) - if errwrite is not None: - _close_in_parent(errwrite) - - # Retain the process handle, but close the thread handle - self._child_created = True - self._handle = hp - self.pid = pid - ht.Close() - - def _internal_poll(self, _deadstate=None, - _WaitForSingleObject=_subprocess.WaitForSingleObject, - _WAIT_OBJECT_0=_subprocess.WAIT_OBJECT_0, - _GetExitCodeProcess=_subprocess.GetExitCodeProcess): - """Check if child process has terminated. Returns returncode - attribute. - - This method is called by __del__, so it can only refer to objects - in its local scope. - - """ - if self.returncode is None: - if _WaitForSingleObject(self._handle, 0) == _WAIT_OBJECT_0: - self.returncode = _GetExitCodeProcess(self._handle) - return self.returncode - - - def wait(self): - """Wait for child process to terminate. Returns returncode - attribute.""" - if self.returncode is None: - _subprocess.WaitForSingleObject(self._handle, - _subprocess.INFINITE) - self.returncode = _subprocess.GetExitCodeProcess(self._handle) - return self.returncode - - - def _readerthread(self, fh, buffer): - buffer.append(fh.read()) - - - def _communicate(self, input): - stdout = None # Return - stderr = None # Return - - if self.stdout: - stdout = [] - stdout_thread = threading.Thread(target=self._readerthread, - args=(self.stdout, stdout)) - stdout_thread.setDaemon(True) - stdout_thread.start() - if self.stderr: - stderr = [] - stderr_thread = threading.Thread(target=self._readerthread, - args=(self.stderr, stderr)) - stderr_thread.setDaemon(True) - stderr_thread.start() - - if self.stdin: - if input is not None: - try: - self.stdin.write(input) - except IOError as e: - if e.errno == errno.EPIPE: - # communicate() should ignore broken pipe error - pass - elif (e.errno == errno.EINVAL - and self.poll() is not None): - # Issue #19612: stdin.write() fails with EINVAL - # if the process already exited before the write - pass - else: - raise - self.stdin.close() - - if self.stdout: - stdout_thread.join() - if self.stderr: - stderr_thread.join() - - # All data exchanged. Translate lists into strings. - if stdout is not None: - stdout = stdout[0] - if stderr is not None: - stderr = stderr[0] - - # Translate newlines, if requested. We cannot let the file - # object do the translation: It is based on stdio, which is - # impossible to combine with select (unless forcing no - # buffering). - if self.universal_newlines and hasattr(file, 'newlines'): - if stdout: - stdout = self._translate_newlines(stdout) - if stderr: - stderr = self._translate_newlines(stderr) - - self.wait() - return (stdout, stderr) - - def send_signal(self, sig): - """Send a signal to the process - """ - if sig == signal.SIGTERM: - self.terminate() - elif sig == signal.CTRL_C_EVENT: - os.kill(self.pid, signal.CTRL_C_EVENT) - elif sig == signal.CTRL_BREAK_EVENT: - os.kill(self.pid, signal.CTRL_BREAK_EVENT) - else: - raise ValueError("Unsupported signal: {}".format(sig)) - - def terminate(self): - """Terminates the process - """ - try: - _subprocess.TerminateProcess(self._handle, 1) - except OSError as e: - # ERROR_ACCESS_DENIED (winerror 5) is received when the - # process already died. - if e.winerror != 5: - raise - rc = _subprocess.GetExitCodeProcess(self._handle) - if rc == _subprocess.STILL_ACTIVE: - raise - self.returncode = rc - - kill = terminate - - else: - # - # POSIX methods - # - def _get_handles(self, stdin, stdout, stderr): - """Construct and return tuple with IO objects: - p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite - """ - to_close = set() - p2cread, p2cwrite = None, None - c2pread, c2pwrite = None, None - errread, errwrite = None, None - - if stdin is None: - pass - elif stdin == PIPE: - p2cread, p2cwrite = self.pipe_cloexec() - to_close.update((p2cread, p2cwrite)) - elif isinstance(stdin, int): - p2cread = stdin - else: - # Assuming file-like object - p2cread = stdin.fileno() - - if stdout is None: - pass - elif stdout == PIPE: - c2pread, c2pwrite = self.pipe_cloexec() - to_close.update((c2pread, c2pwrite)) - elif isinstance(stdout, int): - c2pwrite = stdout - else: - # Assuming file-like object - c2pwrite = stdout.fileno() - - if stderr is None: - pass - elif stderr == PIPE: - errread, errwrite = self.pipe_cloexec() - to_close.update((errread, errwrite)) - elif stderr == STDOUT: - if c2pwrite is not None: - errwrite = c2pwrite - else: # child's stdout is not set, use parent's stdout - errwrite = sys.__stdout__.fileno() - elif isinstance(stderr, int): - errwrite = stderr - else: - # Assuming file-like object - errwrite = stderr.fileno() - - return (p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite), to_close - - - def _set_cloexec_flag(self, fd, cloexec=True): - try: - cloexec_flag = fcntl.FD_CLOEXEC - except AttributeError: - cloexec_flag = 1 - - old = fcntl.fcntl(fd, fcntl.F_GETFD) - if cloexec: - fcntl.fcntl(fd, fcntl.F_SETFD, old | cloexec_flag) - else: - fcntl.fcntl(fd, fcntl.F_SETFD, old & ~cloexec_flag) - - - def pipe_cloexec(self): - """Create a pipe with FDs set CLOEXEC.""" - # Pipes' FDs are set CLOEXEC by default because we don't want them - # to be inherited by other subprocesses: the CLOEXEC flag is removed - # from the child's FDs by _dup2(), between fork() and exec(). - # This is not atomic: we would need the pipe2() syscall for that. - r, w = os.pipe() - self._set_cloexec_flag(r) - self._set_cloexec_flag(w) - return r, w - - - def _close_fds(self, but): - if hasattr(os, 'closerange'): - os.closerange(3, but) - os.closerange(but + 1, MAXFD) - else: - for i in xrange(3, MAXFD): - if i == but: - continue - try: - os.close(i) - except: - pass - - - def _execute_child(self, args, executable, preexec_fn, close_fds, - cwd, env, universal_newlines, - startupinfo, creationflags, shell, to_close, - p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite): - """Execute program (POSIX version)""" - - if isinstance(args, types.StringTypes): - args = [args] - else: - args = list(args) - - if shell: - args = ["/bin/sh", "-c"] + args - if executable: - args[0] = executable - - if executable is None: - executable = args[0] - - def _close_in_parent(fd): - os.close(fd) - to_close.remove(fd) - - # For transferring possible exec failure from child to parent - # The first char specifies the exception type: 0 means - # OSError, 1 means some other error. - errpipe_read, errpipe_write = self.pipe_cloexec() - try: - try: - gc_was_enabled = gc.isenabled() - # Disable gc to avoid bug where gc -> file_dealloc -> - # write to stderr -> hang. http://bugs.python.org/issue1336 - gc.disable() - try: - self.pid = os.fork() - except: - if gc_was_enabled: - gc.enable() - raise - self._child_created = True - if self.pid == 0: - # Child - try: - # Close parent's pipe ends - if p2cwrite is not None: - os.close(p2cwrite) - if c2pread is not None: - os.close(c2pread) - if errread is not None: - os.close(errread) - os.close(errpipe_read) - - # When duping fds, if there arises a situation - # where one of the fds is either 0, 1 or 2, it - # is possible that it is overwritten (#12607). - if c2pwrite == 0: - c2pwrite = os.dup(c2pwrite) - if errwrite == 0 or errwrite == 1: - errwrite = os.dup(errwrite) - - # Dup fds for child - def _dup2(a, b): - # dup2() removes the CLOEXEC flag but - # we must do it ourselves if dup2() - # would be a no-op (issue #10806). - if a == b: - self._set_cloexec_flag(a, False) - elif a is not None: - os.dup2(a, b) - _dup2(p2cread, 0) - _dup2(c2pwrite, 1) - _dup2(errwrite, 2) - - # Close pipe fds. Make sure we don't close the - # same fd more than once, or standard fds. - closed = { None } - for fd in [p2cread, c2pwrite, errwrite]: - if fd not in closed and fd > 2: - os.close(fd) - closed.add(fd) - - if cwd is not None: - os.chdir(cwd) - - if preexec_fn: - preexec_fn() - - # Close all other fds, if asked for - after - # preexec_fn(), which may open FDs. - if close_fds: - self._close_fds(but=errpipe_write) - - if env is None: - os.execvp(executable, args) - else: - os.execvpe(executable, args, env) - - except: - exc_type, exc_value, tb = sys.exc_info() - # Save the traceback and attach it to the exception object - exc_lines = traceback.format_exception(exc_type, - exc_value, - tb) - exc_value.child_traceback = ''.join(exc_lines) - os.write(errpipe_write, pickle.dumps(exc_value)) - - # This exitcode won't be reported to applications, so it - # really doesn't matter what we return. - os._exit(255) - - # Parent - if gc_was_enabled: - gc.enable() - finally: - # be sure the FD is closed no matter what - os.close(errpipe_write) - - # Wait for exec to fail or succeed; possibly raising exception - data = _eintr_retry_call(os.read, errpipe_read, 1048576) - pickle_bits = [] - while data: - pickle_bits.append(data) - data = _eintr_retry_call(os.read, errpipe_read, 1048576) - data = "".join(pickle_bits) - finally: - if p2cread is not None and p2cwrite is not None: - _close_in_parent(p2cread) - if c2pwrite is not None and c2pread is not None: - _close_in_parent(c2pwrite) - if errwrite is not None and errread is not None: - _close_in_parent(errwrite) - - # be sure the FD is closed no matter what - os.close(errpipe_read) - - if data != "": - try: - _eintr_retry_call(os.waitpid, self.pid, 0) - except OSError as e: - if e.errno != errno.ECHILD: - raise - child_exception = pickle.loads(data) - raise child_exception - - - def _handle_exitstatus(self, sts, _WIFSIGNALED=os.WIFSIGNALED, - _WTERMSIG=os.WTERMSIG, _WIFEXITED=os.WIFEXITED, - _WEXITSTATUS=os.WEXITSTATUS): - # This method is called (indirectly) by __del__, so it cannot - # refer to anything outside of its local scope. - if _WIFSIGNALED(sts): - self.returncode = -_WTERMSIG(sts) - elif _WIFEXITED(sts): - self.returncode = _WEXITSTATUS(sts) - else: - # Should never happen - raise RuntimeError("Unknown child exit status!") - - - def _internal_poll(self, _deadstate=None, _waitpid=os.waitpid, - _WNOHANG=os.WNOHANG, _os_error=os.error, _ECHILD=errno.ECHILD): - """Check if child process has terminated. Returns returncode - attribute. - - This method is called by __del__, so it cannot reference anything - outside of the local scope (nor can any methods it calls). - - """ - if self.returncode is None: - try: - pid, sts = _waitpid(self.pid, _WNOHANG) - if pid == self.pid: - self._handle_exitstatus(sts) - except _os_error as e: - if _deadstate is not None: - self.returncode = _deadstate - if e.errno == _ECHILD: - # This happens if SIGCLD is set to be ignored or - # waiting for child processes has otherwise been - # disabled for our process. This child is dead, we - # can't get the status. - # http://bugs.python.org/issue15756 - self.returncode = 0 - return self.returncode - - - def wait(self): - """Wait for child process to terminate. Returns returncode - attribute.""" - while self.returncode is None: - try: - pid, sts = _eintr_retry_call(os.waitpid, self.pid, 0) - except OSError as e: - if e.errno != errno.ECHILD: - raise - # This happens if SIGCLD is set to be ignored or waiting - # for child processes has otherwise been disabled for our - # process. This child is dead, we can't get the status. - pid = self.pid - sts = 0 - # Check the pid and loop as waitpid has been known to return - # 0 even without WNOHANG in odd situations. issue14396. - if pid == self.pid: - self._handle_exitstatus(sts) - return self.returncode - - - def _communicate(self, input): - if self.stdin: - # Flush stdio buffer. This might block, if the user has - # been writing to .stdin in an uncontrolled fashion. - self.stdin.flush() - if not input: - self.stdin.close() - - if _has_poll: - stdout, stderr = self._communicate_with_poll(input) - else: - stdout, stderr = self._communicate_with_select(input) - - # All data exchanged. Translate lists into strings. - if stdout is not None: - stdout = ''.join(stdout) - if stderr is not None: - stderr = ''.join(stderr) - - # Translate newlines, if requested. We cannot let the file - # object do the translation: It is based on stdio, which is - # impossible to combine with select (unless forcing no - # buffering). - if self.universal_newlines and hasattr(file, 'newlines'): - if stdout: - stdout = self._translate_newlines(stdout) - if stderr: - stderr = self._translate_newlines(stderr) - - self.wait() - return (stdout, stderr) - - - def _communicate_with_poll(self, input): - stdout = None # Return - stderr = None # Return - fd2file = {} - fd2output = {} - - poller = select.poll() - def register_and_append(file_obj, eventmask): - poller.register(file_obj.fileno(), eventmask) - fd2file[file_obj.fileno()] = file_obj - - def close_unregister_and_remove(fd): - poller.unregister(fd) - fd2file[fd].close() - fd2file.pop(fd) - - if self.stdin and input: - register_and_append(self.stdin, select.POLLOUT) - - select_POLLIN_POLLPRI = select.POLLIN | select.POLLPRI - if self.stdout: - register_and_append(self.stdout, select_POLLIN_POLLPRI) - fd2output[self.stdout.fileno()] = stdout = [] - if self.stderr: - register_and_append(self.stderr, select_POLLIN_POLLPRI) - fd2output[self.stderr.fileno()] = stderr = [] - - input_offset = 0 - while fd2file: - try: - ready = poller.poll() - except select.error, e: - if e.args[0] == errno.EINTR: - continue - raise - - for fd, mode in ready: - if mode & select.POLLOUT: - chunk = input[input_offset : input_offset + _PIPE_BUF] - try: - input_offset += os.write(fd, chunk) - except OSError as e: - if e.errno == errno.EPIPE: - close_unregister_and_remove(fd) - else: - raise - else: - if input_offset >= len(input): - close_unregister_and_remove(fd) - elif mode & select_POLLIN_POLLPRI: - data = os.read(fd, 4096) - if not data: - close_unregister_and_remove(fd) - fd2output[fd].append(data) - else: - # Ignore hang up or errors. - close_unregister_and_remove(fd) - - return (stdout, stderr) - - - def _communicate_with_select(self, input): - read_set = [] - write_set = [] - stdout = None # Return - stderr = None # Return - - if self.stdin and input: - write_set.append(self.stdin) - if self.stdout: - read_set.append(self.stdout) - stdout = [] - if self.stderr: - read_set.append(self.stderr) - stderr = [] - - input_offset = 0 - while read_set or write_set: - try: - rlist, wlist, xlist = select.select(read_set, write_set, []) - except select.error, e: - if e.args[0] == errno.EINTR: - continue - raise - - if self.stdin in wlist: - chunk = input[input_offset : input_offset + _PIPE_BUF] - try: - bytes_written = os.write(self.stdin.fileno(), chunk) - except OSError as e: - if e.errno == errno.EPIPE: - self.stdin.close() - write_set.remove(self.stdin) - else: - raise - else: - input_offset += bytes_written - if input_offset >= len(input): - self.stdin.close() - write_set.remove(self.stdin) - - if self.stdout in rlist: - data = os.read(self.stdout.fileno(), 1024) - if data == "": - self.stdout.close() - read_set.remove(self.stdout) - stdout.append(data) - - if self.stderr in rlist: - data = os.read(self.stderr.fileno(), 1024) - if data == "": - self.stderr.close() - read_set.remove(self.stderr) - stderr.append(data) - - return (stdout, stderr) - - - def send_signal(self, sig): - """Send a signal to the process - """ - os.kill(self.pid, sig) - - def terminate(self): - """Terminate the process with SIGTERM - """ - self.send_signal(signal.SIGTERM) - - def kill(self): - """Kill the process with SIGKILL - """ - self.send_signal(signal.SIGKILL) - - -def _demo_posix(): - # - # Example 1: Simple redirection: Get process list - # - plist = Popen(["ps"], stdout=PIPE).communicate()[0] - print "Process list:" - print plist - - # - # Example 2: Change uid before executing child - # - if os.getuid() == 0: - p = Popen(["id"], preexec_fn=lambda: os.setuid(100)) - p.wait() - - # - # Example 3: Connecting several subprocesses - # - print "Looking for 'hda'..." - p1 = Popen(["dmesg"], stdout=PIPE) - p2 = Popen(["grep", "hda"], stdin=p1.stdout, stdout=PIPE) - print repr(p2.communicate()[0]) - - # - # Example 4: Catch execution error - # - print - print "Trying a weird file..." - try: - print Popen(["/this/path/does/not/exist"]).communicate() - except OSError, e: - if e.errno == errno.ENOENT: - print "The file didn't exist. I thought so..." - print "Child traceback:" - print e.child_traceback - else: - print "Error", e.errno - else: - print >>sys.stderr, "Gosh. No error." - - -def _demo_windows(): - # - # Example 1: Connecting several subprocesses - # - print "Looking for 'PROMPT' in set output..." - p1 = Popen("set", stdout=PIPE, shell=True) - p2 = Popen('find "PROMPT"', stdin=p1.stdout, stdout=PIPE) - print repr(p2.communicate()[0]) - - # - # Example 2: Simple execution of program - # - print "Executing calc..." - p = Popen("calc") - p.wait() - - -if __name__ == "__main__": - if mswindows: - _demo_windows() - else: - _demo_posix() diff --git a/python/Lib/sunau.py b/python/Lib/sunau.py deleted file mode 100755 index b53044d22b..0000000000 --- a/python/Lib/sunau.py +++ /dev/null @@ -1,493 +0,0 @@ -"""Stuff to parse Sun and NeXT audio files. - -An audio file consists of a header followed by the data. The structure -of the header is as follows. - - +---------------+ - | magic word | - +---------------+ - | header size | - +---------------+ - | data size | - +---------------+ - | encoding | - +---------------+ - | sample rate | - +---------------+ - | # of channels | - +---------------+ - | info | - | | - +---------------+ - -The magic word consists of the 4 characters '.snd'. Apart from the -info field, all header fields are 4 bytes in size. They are all -32-bit unsigned integers encoded in big-endian byte order. - -The header size really gives the start of the data. -The data size is the physical size of the data. From the other -parameters the number of frames can be calculated. -The encoding gives the way in which audio samples are encoded. -Possible values are listed below. -The info field currently consists of an ASCII string giving a -human-readable description of the audio file. The info field is -padded with NUL bytes to the header size. - -Usage. - -Reading audio files: - f = sunau.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -When the setpos() and rewind() methods are not used, the seek() -method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' or 'ULAW') - getcompname() -- returns human-readable version of - compression type ('not compressed' matches 'NONE') - getparams() -- returns a tuple consisting of all of the - above in the above order - getmarkers() -- returns None (for compatibility with the - aifc module) - getmark(id) -- raises an error since the mark does not - exist (for compatibility with the aifc module) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell() and the position given to setpos() -are compatible and have nothing to do with the actual position in the -file. -The close() method is called automatically when the class instance -is destroyed. - -Writing audio files: - f = sunau.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple)-- set all parameters at once - tell() -- return current position in output file - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes('') or -close() to patch up the sizes in the header. -The close() method is called automatically when the class instance -is destroyed. -""" - -# from -AUDIO_FILE_MAGIC = 0x2e736e64 -AUDIO_FILE_ENCODING_MULAW_8 = 1 -AUDIO_FILE_ENCODING_LINEAR_8 = 2 -AUDIO_FILE_ENCODING_LINEAR_16 = 3 -AUDIO_FILE_ENCODING_LINEAR_24 = 4 -AUDIO_FILE_ENCODING_LINEAR_32 = 5 -AUDIO_FILE_ENCODING_FLOAT = 6 -AUDIO_FILE_ENCODING_DOUBLE = 7 -AUDIO_FILE_ENCODING_ADPCM_G721 = 23 -AUDIO_FILE_ENCODING_ADPCM_G722 = 24 -AUDIO_FILE_ENCODING_ADPCM_G723_3 = 25 -AUDIO_FILE_ENCODING_ADPCM_G723_5 = 26 -AUDIO_FILE_ENCODING_ALAW_8 = 27 - -# from -AUDIO_UNKNOWN_SIZE = 0xFFFFFFFFL # ((unsigned)(~0)) - -_simple_encodings = [AUDIO_FILE_ENCODING_MULAW_8, - AUDIO_FILE_ENCODING_LINEAR_8, - AUDIO_FILE_ENCODING_LINEAR_16, - AUDIO_FILE_ENCODING_LINEAR_24, - AUDIO_FILE_ENCODING_LINEAR_32, - AUDIO_FILE_ENCODING_ALAW_8] - -class Error(Exception): - pass - -def _read_u32(file): - x = 0L - for i in range(4): - byte = file.read(1) - if byte == '': - raise EOFError - x = x*256 + ord(byte) - return x - -def _write_u32(file, x): - data = [] - for i in range(4): - d, m = divmod(x, 256) - data.insert(0, m) - x = d - for i in range(4): - file.write(chr(int(data[i]))) - -class Au_read: - - def __init__(self, f): - if type(f) == type(''): - import __builtin__ - f = __builtin__.open(f, 'rb') - self.initfp(f) - - def __del__(self): - if self._file: - self.close() - - def initfp(self, file): - self._file = file - self._soundpos = 0 - magic = int(_read_u32(file)) - if magic != AUDIO_FILE_MAGIC: - raise Error, 'bad magic number' - self._hdr_size = int(_read_u32(file)) - if self._hdr_size < 24: - raise Error, 'header size too small' - if self._hdr_size > 100: - raise Error, 'header size ridiculously large' - self._data_size = _read_u32(file) - if self._data_size != AUDIO_UNKNOWN_SIZE: - self._data_size = int(self._data_size) - self._encoding = int(_read_u32(file)) - if self._encoding not in _simple_encodings: - raise Error, 'encoding not (yet) supported' - if self._encoding in (AUDIO_FILE_ENCODING_MULAW_8, - AUDIO_FILE_ENCODING_ALAW_8): - self._sampwidth = 2 - self._framesize = 1 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_8: - self._framesize = self._sampwidth = 1 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_16: - self._framesize = self._sampwidth = 2 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_24: - self._framesize = self._sampwidth = 3 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_32: - self._framesize = self._sampwidth = 4 - else: - raise Error, 'unknown encoding' - self._framerate = int(_read_u32(file)) - self._nchannels = int(_read_u32(file)) - self._framesize = self._framesize * self._nchannels - if self._hdr_size > 24: - self._info = file.read(self._hdr_size - 24) - for i in range(len(self._info)): - if self._info[i] == '\0': - self._info = self._info[:i] - break - else: - self._info = '' - try: - self._data_pos = file.tell() - except (AttributeError, IOError): - self._data_pos = None - - def getfp(self): - return self._file - - def getnchannels(self): - return self._nchannels - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getnframes(self): - if self._data_size == AUDIO_UNKNOWN_SIZE: - return AUDIO_UNKNOWN_SIZE - if self._encoding in _simple_encodings: - return self._data_size // self._framesize - return 0 # XXX--must do some arithmetic here - - def getcomptype(self): - if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: - return 'ULAW' - elif self._encoding == AUDIO_FILE_ENCODING_ALAW_8: - return 'ALAW' - else: - return 'NONE' - - def getcompname(self): - if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: - return 'CCITT G.711 u-law' - elif self._encoding == AUDIO_FILE_ENCODING_ALAW_8: - return 'CCITT G.711 A-law' - else: - return 'not compressed' - - def getparams(self): - return self.getnchannels(), self.getsampwidth(), \ - self.getframerate(), self.getnframes(), \ - self.getcomptype(), self.getcompname() - - def getmarkers(self): - return None - - def getmark(self, id): - raise Error, 'no marks' - - def readframes(self, nframes): - if self._encoding in _simple_encodings: - if nframes == AUDIO_UNKNOWN_SIZE: - data = self._file.read() - else: - data = self._file.read(nframes * self._framesize) - self._soundpos += len(data) // self._framesize - if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: - import audioop - data = audioop.ulaw2lin(data, self._sampwidth) - return data - return None # XXX--not implemented yet - - def rewind(self): - if self._data_pos is None: - raise IOError('cannot seek') - self._file.seek(self._data_pos) - self._soundpos = 0 - - def tell(self): - return self._soundpos - - def setpos(self, pos): - if pos < 0 or pos > self.getnframes(): - raise Error, 'position not in range' - if self._data_pos is None: - raise IOError('cannot seek') - self._file.seek(self._data_pos + pos * self._framesize) - self._soundpos = pos - - def close(self): - self._file = None - -class Au_write: - - def __init__(self, f): - if type(f) == type(''): - import __builtin__ - f = __builtin__.open(f, 'wb') - self.initfp(f) - - def __del__(self): - if self._file: - self.close() - - def initfp(self, file): - self._file = file - self._framerate = 0 - self._nchannels = 0 - self._sampwidth = 0 - self._framesize = 0 - self._nframes = AUDIO_UNKNOWN_SIZE - self._nframeswritten = 0 - self._datawritten = 0 - self._datalength = 0 - self._info = '' - self._comptype = 'ULAW' # default is U-law - - def setnchannels(self, nchannels): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - if nchannels not in (1, 2, 4): - raise Error, 'only 1, 2, or 4 channels supported' - self._nchannels = nchannels - - def getnchannels(self): - if not self._nchannels: - raise Error, 'number of channels not set' - return self._nchannels - - def setsampwidth(self, sampwidth): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - if sampwidth not in (1, 2, 4): - raise Error, 'bad sample width' - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._framerate: - raise Error, 'sample width not specified' - return self._sampwidth - - def setframerate(self, framerate): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error, 'frame rate not set' - return self._framerate - - def setnframes(self, nframes): - if self._nframeswritten: - raise Error, 'cannot change parameters after starting to write' - if nframes < 0: - raise Error, '# of frames cannot be negative' - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, type, name): - if type in ('NONE', 'ULAW'): - self._comptype = type - else: - raise Error, 'unknown compression type' - - def getcomptype(self): - return self._comptype - - def getcompname(self): - if self._comptype == 'ULAW': - return 'CCITT G.711 u-law' - elif self._comptype == 'ALAW': - return 'CCITT G.711 A-law' - else: - return 'not compressed' - - def setparams(self, params): - nchannels, sampwidth, framerate, nframes, comptype, compname = params - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - return self.getnchannels(), self.getsampwidth(), \ - self.getframerate(), self.getnframes(), \ - self.getcomptype(), self.getcompname() - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - self._ensure_header_written() - if self._comptype == 'ULAW': - import audioop - data = audioop.lin2ulaw(data, self._sampwidth) - nframes = len(data) // self._framesize - self._file.write(data) - self._nframeswritten = self._nframeswritten + nframes - self._datawritten = self._datawritten + len(data) - - def writeframes(self, data): - self.writeframesraw(data) - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - - def close(self): - if self._file: - try: - self._ensure_header_written() - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - self._file.flush() - finally: - self._file = None - - # - # private methods - # - - def _ensure_header_written(self): - if not self._nframeswritten: - if not self._nchannels: - raise Error, '# of channels not specified' - if not self._sampwidth: - raise Error, 'sample width not specified' - if not self._framerate: - raise Error, 'frame rate not specified' - self._write_header() - - def _write_header(self): - if self._comptype == 'NONE': - if self._sampwidth == 1: - encoding = AUDIO_FILE_ENCODING_LINEAR_8 - self._framesize = 1 - elif self._sampwidth == 2: - encoding = AUDIO_FILE_ENCODING_LINEAR_16 - self._framesize = 2 - elif self._sampwidth == 4: - encoding = AUDIO_FILE_ENCODING_LINEAR_32 - self._framesize = 4 - else: - raise Error, 'internal error' - elif self._comptype == 'ULAW': - encoding = AUDIO_FILE_ENCODING_MULAW_8 - self._framesize = 1 - else: - raise Error, 'internal error' - self._framesize = self._framesize * self._nchannels - _write_u32(self._file, AUDIO_FILE_MAGIC) - header_size = 25 + len(self._info) - header_size = (header_size + 7) & ~7 - _write_u32(self._file, header_size) - if self._nframes == AUDIO_UNKNOWN_SIZE: - length = AUDIO_UNKNOWN_SIZE - else: - length = self._nframes * self._framesize - try: - self._form_length_pos = self._file.tell() - except (AttributeError, IOError): - self._form_length_pos = None - _write_u32(self._file, length) - self._datalength = length - _write_u32(self._file, encoding) - _write_u32(self._file, self._framerate) - _write_u32(self._file, self._nchannels) - self._file.write(self._info) - self._file.write('\0'*(header_size - len(self._info) - 24)) - - def _patchheader(self): - if self._form_length_pos is None: - raise IOError('cannot seek') - self._file.seek(self._form_length_pos) - _write_u32(self._file, self._datawritten) - self._datalength = self._datawritten - self._file.seek(0, 2) - -def open(f, mode=None): - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return Au_read(f) - elif mode in ('w', 'wb'): - return Au_write(f) - else: - raise Error, "mode must be 'r', 'rb', 'w', or 'wb'" - -openfp = open diff --git a/python/Lib/sunaudio.py b/python/Lib/sunaudio.py deleted file mode 100755 index 97d7332c48..0000000000 --- a/python/Lib/sunaudio.py +++ /dev/null @@ -1,49 +0,0 @@ -"""Interpret sun audio headers.""" -from warnings import warnpy3k -warnpy3k("the sunaudio module has been removed in Python 3.0; " - "use the sunau module instead", stacklevel=2) -del warnpy3k - - -MAGIC = '.snd' - -class error(Exception): - pass - - -def get_long_be(s): - """Convert a 4-char value to integer.""" - return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3]) - - -def gethdr(fp): - """Read a sound header from an open file.""" - if fp.read(4) != MAGIC: - raise error, 'gethdr: bad magic word' - hdr_size = get_long_be(fp.read(4)) - data_size = get_long_be(fp.read(4)) - encoding = get_long_be(fp.read(4)) - sample_rate = get_long_be(fp.read(4)) - channels = get_long_be(fp.read(4)) - excess = hdr_size - 24 - if excess < 0: - raise error, 'gethdr: bad hdr_size' - if excess > 0: - info = fp.read(excess) - else: - info = '' - return (data_size, encoding, sample_rate, channels, info) - - -def printhdr(file): - """Read and print the sound header of a named file.""" - hdr = gethdr(open(file, 'r')) - data_size, encoding, sample_rate, channels, info = hdr - while info[-1:] == '\0': - info = info[:-1] - print 'File name: ', file - print 'Data size: ', data_size - print 'Encoding: ', encoding - print 'Sample rate:', sample_rate - print 'Channels: ', channels - print 'Info: ', repr(info) diff --git a/python/Lib/symbol.py b/python/Lib/symbol.py deleted file mode 100755 index b4d4e13ab8..0000000000 --- a/python/Lib/symbol.py +++ /dev/null @@ -1,114 +0,0 @@ -#! /usr/bin/env python - -"""Non-terminal symbols of Python grammar (from "graminit.h").""" - -# This file is automatically generated; please don't muck it up! -# -# To update the symbols in this file, 'cd' to the top directory of -# the python source tree after building the interpreter and run: -# -# ./python Lib/symbol.py - -#--start constants-- -single_input = 256 -file_input = 257 -eval_input = 258 -decorator = 259 -decorators = 260 -decorated = 261 -funcdef = 262 -parameters = 263 -varargslist = 264 -fpdef = 265 -fplist = 266 -stmt = 267 -simple_stmt = 268 -small_stmt = 269 -expr_stmt = 270 -augassign = 271 -print_stmt = 272 -del_stmt = 273 -pass_stmt = 274 -flow_stmt = 275 -break_stmt = 276 -continue_stmt = 277 -return_stmt = 278 -yield_stmt = 279 -raise_stmt = 280 -import_stmt = 281 -import_name = 282 -import_from = 283 -import_as_name = 284 -dotted_as_name = 285 -import_as_names = 286 -dotted_as_names = 287 -dotted_name = 288 -global_stmt = 289 -exec_stmt = 290 -assert_stmt = 291 -compound_stmt = 292 -if_stmt = 293 -while_stmt = 294 -for_stmt = 295 -try_stmt = 296 -with_stmt = 297 -with_item = 298 -except_clause = 299 -suite = 300 -testlist_safe = 301 -old_test = 302 -old_lambdef = 303 -test = 304 -or_test = 305 -and_test = 306 -not_test = 307 -comparison = 308 -comp_op = 309 -expr = 310 -xor_expr = 311 -and_expr = 312 -shift_expr = 313 -arith_expr = 314 -term = 315 -factor = 316 -power = 317 -atom = 318 -listmaker = 319 -testlist_comp = 320 -lambdef = 321 -trailer = 322 -subscriptlist = 323 -subscript = 324 -sliceop = 325 -exprlist = 326 -testlist = 327 -dictorsetmaker = 328 -classdef = 329 -arglist = 330 -argument = 331 -list_iter = 332 -list_for = 333 -list_if = 334 -comp_iter = 335 -comp_for = 336 -comp_if = 337 -testlist1 = 338 -encoding_decl = 339 -yield_expr = 340 -#--end constants-- - -sym_name = {} -for _name, _value in globals().items(): - if type(_value) is type(0): - sym_name[_value] = _name - - -def main(): - import sys - import token - if len(sys.argv) == 1: - sys.argv = sys.argv + ["Include/graminit.h", "Lib/symbol.py"] - token.main() - -if __name__ == "__main__": - main() diff --git a/python/Lib/symtable.py b/python/Lib/symtable.py deleted file mode 100755 index 0ba9d1af34..0000000000 --- a/python/Lib/symtable.py +++ /dev/null @@ -1,239 +0,0 @@ -"""Interface to the compiler's internal symbol tables""" - -import _symtable -from _symtable import (USE, DEF_GLOBAL, DEF_LOCAL, DEF_PARAM, - DEF_IMPORT, DEF_BOUND, OPT_IMPORT_STAR, OPT_EXEC, OPT_BARE_EXEC, - SCOPE_OFF, SCOPE_MASK, FREE, GLOBAL_IMPLICIT, GLOBAL_EXPLICIT, CELL, LOCAL) - -import weakref - -__all__ = ["symtable", "SymbolTable", "Class", "Function", "Symbol"] - -def symtable(code, filename, compile_type): - top = _symtable.symtable(code, filename, compile_type) - return _newSymbolTable(top, filename) - -class SymbolTableFactory: - def __init__(self): - self.__memo = weakref.WeakValueDictionary() - - def new(self, table, filename): - if table.type == _symtable.TYPE_FUNCTION: - return Function(table, filename) - if table.type == _symtable.TYPE_CLASS: - return Class(table, filename) - return SymbolTable(table, filename) - - def __call__(self, table, filename): - key = table, filename - obj = self.__memo.get(key, None) - if obj is None: - obj = self.__memo[key] = self.new(table, filename) - return obj - -_newSymbolTable = SymbolTableFactory() - - -class SymbolTable(object): - - def __init__(self, raw_table, filename): - self._table = raw_table - self._filename = filename - self._symbols = {} - - def __repr__(self): - if self.__class__ == SymbolTable: - kind = "" - else: - kind = "%s " % self.__class__.__name__ - - if self._table.name == "global": - return "<{0}SymbolTable for module {1}>".format(kind, self._filename) - else: - return "<{0}SymbolTable for {1} in {2}>".format(kind, - self._table.name, - self._filename) - - def get_type(self): - if self._table.type == _symtable.TYPE_MODULE: - return "module" - if self._table.type == _symtable.TYPE_FUNCTION: - return "function" - if self._table.type == _symtable.TYPE_CLASS: - return "class" - assert self._table.type in (1, 2, 3), \ - "unexpected type: {0}".format(self._table.type) - - def get_id(self): - return self._table.id - - def get_name(self): - return self._table.name - - def get_lineno(self): - return self._table.lineno - - def is_optimized(self): - return bool(self._table.type == _symtable.TYPE_FUNCTION - and not self._table.optimized) - - def is_nested(self): - return bool(self._table.nested) - - def has_children(self): - return bool(self._table.children) - - def has_exec(self): - """Return true if the scope uses exec""" - return bool(self._table.optimized & (OPT_EXEC | OPT_BARE_EXEC)) - - def has_import_star(self): - """Return true if the scope uses import *""" - return bool(self._table.optimized & OPT_IMPORT_STAR) - - def get_identifiers(self): - return self._table.symbols.keys() - - def lookup(self, name): - sym = self._symbols.get(name) - if sym is None: - flags = self._table.symbols[name] - namespaces = self.__check_children(name) - sym = self._symbols[name] = Symbol(name, flags, namespaces) - return sym - - def get_symbols(self): - return [self.lookup(ident) for ident in self.get_identifiers()] - - def __check_children(self, name): - return [_newSymbolTable(st, self._filename) - for st in self._table.children - if st.name == name] - - def get_children(self): - return [_newSymbolTable(st, self._filename) - for st in self._table.children] - - -class Function(SymbolTable): - - # Default values for instance variables - __params = None - __locals = None - __frees = None - __globals = None - - def __idents_matching(self, test_func): - return tuple([ident for ident in self.get_identifiers() - if test_func(self._table.symbols[ident])]) - - def get_parameters(self): - if self.__params is None: - self.__params = self.__idents_matching(lambda x:x & DEF_PARAM) - return self.__params - - def get_locals(self): - if self.__locals is None: - locs = (LOCAL, CELL) - test = lambda x: ((x >> SCOPE_OFF) & SCOPE_MASK) in locs - self.__locals = self.__idents_matching(test) - return self.__locals - - def get_globals(self): - if self.__globals is None: - glob = (GLOBAL_IMPLICIT, GLOBAL_EXPLICIT) - test = lambda x:((x >> SCOPE_OFF) & SCOPE_MASK) in glob - self.__globals = self.__idents_matching(test) - return self.__globals - - def get_frees(self): - if self.__frees is None: - is_free = lambda x:((x >> SCOPE_OFF) & SCOPE_MASK) == FREE - self.__frees = self.__idents_matching(is_free) - return self.__frees - - -class Class(SymbolTable): - - __methods = None - - def get_methods(self): - if self.__methods is None: - d = {} - for st in self._table.children: - d[st.name] = 1 - self.__methods = tuple(d) - return self.__methods - - -class Symbol(object): - - def __init__(self, name, flags, namespaces=None): - self.__name = name - self.__flags = flags - self.__scope = (flags >> SCOPE_OFF) & SCOPE_MASK # like PyST_GetScope() - self.__namespaces = namespaces or () - - def __repr__(self): - return "".format(self.__name) - - def get_name(self): - return self.__name - - def is_referenced(self): - return bool(self.__flags & _symtable.USE) - - def is_parameter(self): - return bool(self.__flags & DEF_PARAM) - - def is_global(self): - return bool(self.__scope in (GLOBAL_IMPLICIT, GLOBAL_EXPLICIT)) - - def is_declared_global(self): - return bool(self.__scope == GLOBAL_EXPLICIT) - - def is_local(self): - return bool(self.__flags & DEF_BOUND) - - def is_free(self): - return bool(self.__scope == FREE) - - def is_imported(self): - return bool(self.__flags & DEF_IMPORT) - - def is_assigned(self): - return bool(self.__flags & DEF_LOCAL) - - def is_namespace(self): - """Returns true if name binding introduces new namespace. - - If the name is used as the target of a function or class - statement, this will be true. - - Note that a single name can be bound to multiple objects. If - is_namespace() is true, the name may also be bound to other - objects, like an int or list, that does not introduce a new - namespace. - """ - return bool(self.__namespaces) - - def get_namespaces(self): - """Return a list of namespaces bound to this name""" - return self.__namespaces - - def get_namespace(self): - """Returns the single namespace bound to this name. - - Raises ValueError if the name is bound to multiple namespaces. - """ - if len(self.__namespaces) != 1: - raise ValueError, "name is bound to multiple namespaces" - return self.__namespaces[0] - -if __name__ == "__main__": - import os, sys - src = open(sys.argv[0]).read() - mod = symtable(src, os.path.split(sys.argv[0])[1], "exec") - for ident in mod.get_identifiers(): - info = mod.lookup(ident) - print info, info.is_local(), info.is_namespace() diff --git a/python/Lib/sysconfig.py b/python/Lib/sysconfig.py deleted file mode 100644 index 2a1da5a03b..0000000000 --- a/python/Lib/sysconfig.py +++ /dev/null @@ -1,640 +0,0 @@ -"""Provide access to Python's configuration information. - -""" -import sys -import os -from os.path import pardir, realpath - -_INSTALL_SCHEMES = { - 'posix_prefix': { - 'stdlib': '{base}/lib/python{py_version_short}', - 'platstdlib': '{platbase}/lib/python{py_version_short}', - 'purelib': '{base}/lib/python{py_version_short}/site-packages', - 'platlib': '{platbase}/lib/python{py_version_short}/site-packages', - 'include': '{base}/include/python{py_version_short}', - 'platinclude': '{platbase}/include/python{py_version_short}', - 'scripts': '{base}/bin', - 'data': '{base}', - }, - 'posix_home': { - 'stdlib': '{base}/lib/python', - 'platstdlib': '{base}/lib/python', - 'purelib': '{base}/lib/python', - 'platlib': '{base}/lib/python', - 'include': '{base}/include/python', - 'platinclude': '{base}/include/python', - 'scripts': '{base}/bin', - 'data' : '{base}', - }, - 'nt': { - 'stdlib': '{base}/Lib', - 'platstdlib': '{base}/Lib', - 'purelib': '{base}/Lib/site-packages', - 'platlib': '{base}/Lib/site-packages', - 'include': '{base}/Include', - 'platinclude': '{base}/Include', - 'scripts': '{base}/Scripts', - 'data' : '{base}', - }, - 'os2': { - 'stdlib': '{base}/Lib', - 'platstdlib': '{base}/Lib', - 'purelib': '{base}/Lib/site-packages', - 'platlib': '{base}/Lib/site-packages', - 'include': '{base}/Include', - 'platinclude': '{base}/Include', - 'scripts': '{base}/Scripts', - 'data' : '{base}', - }, - 'os2_home': { - 'stdlib': '{userbase}/lib/python{py_version_short}', - 'platstdlib': '{userbase}/lib/python{py_version_short}', - 'purelib': '{userbase}/lib/python{py_version_short}/site-packages', - 'platlib': '{userbase}/lib/python{py_version_short}/site-packages', - 'include': '{userbase}/include/python{py_version_short}', - 'scripts': '{userbase}/bin', - 'data' : '{userbase}', - }, - 'nt_user': { - 'stdlib': '{userbase}/Python{py_version_nodot}', - 'platstdlib': '{userbase}/Python{py_version_nodot}', - 'purelib': '{userbase}/Python{py_version_nodot}/site-packages', - 'platlib': '{userbase}/Python{py_version_nodot}/site-packages', - 'include': '{userbase}/Python{py_version_nodot}/Include', - 'scripts': '{userbase}/Scripts', - 'data' : '{userbase}', - }, - 'posix_user': { - 'stdlib': '{userbase}/lib/python{py_version_short}', - 'platstdlib': '{userbase}/lib/python{py_version_short}', - 'purelib': '{userbase}/lib/python{py_version_short}/site-packages', - 'platlib': '{userbase}/lib/python{py_version_short}/site-packages', - 'include': '{userbase}/include/python{py_version_short}', - 'scripts': '{userbase}/bin', - 'data' : '{userbase}', - }, - 'osx_framework_user': { - 'stdlib': '{userbase}/lib/python', - 'platstdlib': '{userbase}/lib/python', - 'purelib': '{userbase}/lib/python/site-packages', - 'platlib': '{userbase}/lib/python/site-packages', - 'include': '{userbase}/include', - 'scripts': '{userbase}/bin', - 'data' : '{userbase}', - }, - } - -_SCHEME_KEYS = ('stdlib', 'platstdlib', 'purelib', 'platlib', 'include', - 'scripts', 'data') -_PY_VERSION = sys.version.split()[0] -_PY_VERSION_SHORT = sys.version[:3] -_PY_VERSION_SHORT_NO_DOT = _PY_VERSION[0] + _PY_VERSION[2] -_PREFIX = os.path.normpath(sys.prefix) -_EXEC_PREFIX = os.path.normpath(sys.exec_prefix) -_CONFIG_VARS = None -_USER_BASE = None - -def _safe_realpath(path): - try: - return realpath(path) - except OSError: - return path - -if sys.executable: - _PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable)) -else: - # sys.executable can be empty if argv[0] has been changed and Python is - # unable to retrieve the real program name - _PROJECT_BASE = _safe_realpath(os.getcwd()) - -if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower(): - _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir)) -# PC/VS7.1 -if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower(): - _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) -# PC/AMD64 -if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower(): - _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) - -# set for cross builds -if "_PYTHON_PROJECT_BASE" in os.environ: - # the build directory for posix builds - _PROJECT_BASE = os.path.normpath(os.path.abspath(".")) -def is_python_build(): - for fn in ("Setup.dist", "Setup.local"): - if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)): - return True - return False - -_PYTHON_BUILD = is_python_build() - -if _PYTHON_BUILD: - for scheme in ('posix_prefix', 'posix_home'): - _INSTALL_SCHEMES[scheme]['include'] = '{projectbase}/Include' - _INSTALL_SCHEMES[scheme]['platinclude'] = '{srcdir}' - -def _subst_vars(s, local_vars): - try: - return s.format(**local_vars) - except KeyError: - try: - return s.format(**os.environ) - except KeyError, var: - raise AttributeError('{%s}' % var) - -def _extend_dict(target_dict, other_dict): - target_keys = target_dict.keys() - for key, value in other_dict.items(): - if key in target_keys: - continue - target_dict[key] = value - -def _expand_vars(scheme, vars): - res = {} - if vars is None: - vars = {} - _extend_dict(vars, get_config_vars()) - - for key, value in _INSTALL_SCHEMES[scheme].items(): - if os.name in ('posix', 'nt'): - value = os.path.expanduser(value) - res[key] = os.path.normpath(_subst_vars(value, vars)) - return res - -def _get_default_scheme(): - if os.name == 'posix': - # the default scheme for posix is posix_prefix - return 'posix_prefix' - return os.name - -def _getuserbase(): - env_base = os.environ.get("PYTHONUSERBASE", None) - def joinuser(*args): - return os.path.expanduser(os.path.join(*args)) - - # what about 'os2emx', 'riscos' ? - if os.name == "nt": - base = os.environ.get("APPDATA") or "~" - return env_base if env_base else joinuser(base, "Python") - - if sys.platform == "darwin": - framework = get_config_var("PYTHONFRAMEWORK") - if framework: - return env_base if env_base else \ - joinuser("~", "Library", framework, "%d.%d" - % (sys.version_info[:2])) - - return env_base if env_base else joinuser("~", ".local") - - -def _parse_makefile(filename, vars=None): - """Parse a Makefile-style file. - - A dictionary containing name/value pairs is returned. If an - optional dictionary is passed in as the second argument, it is - used instead of a new dictionary. - """ - import re - # Regexes needed for parsing Makefile (and similar syntaxes, - # like old-style Setup files). - _variable_rx = re.compile("([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)") - _findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)") - _findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}") - - if vars is None: - vars = {} - done = {} - notdone = {} - - with open(filename) as f: - lines = f.readlines() - - for line in lines: - if line.startswith('#') or line.strip() == '': - continue - m = _variable_rx.match(line) - if m: - n, v = m.group(1, 2) - v = v.strip() - # `$$' is a literal `$' in make - tmpv = v.replace('$$', '') - - if "$" in tmpv: - notdone[n] = v - else: - try: - v = int(v) - except ValueError: - # insert literal `$' - done[n] = v.replace('$$', '$') - else: - done[n] = v - - # do variable interpolation here - while notdone: - for name in notdone.keys(): - value = notdone[name] - m = _findvar1_rx.search(value) or _findvar2_rx.search(value) - if m: - n = m.group(1) - found = True - if n in done: - item = str(done[n]) - elif n in notdone: - # get it on a subsequent round - found = False - elif n in os.environ: - # do it like make: fall back to environment - item = os.environ[n] - else: - done[n] = item = "" - if found: - after = value[m.end():] - value = value[:m.start()] + item + after - if "$" in after: - notdone[name] = value - else: - try: value = int(value) - except ValueError: - done[name] = value.strip() - else: - done[name] = value - del notdone[name] - else: - # bogus variable reference; just drop it since we can't deal - del notdone[name] - # strip spurious spaces - for k, v in done.items(): - if isinstance(v, str): - done[k] = v.strip() - - # save the results in the global dictionary - vars.update(done) - return vars - - -def get_makefile_filename(): - """Return the path of the Makefile.""" - if _PYTHON_BUILD: - return os.path.join(_PROJECT_BASE, "Makefile") - return os.path.join(get_path('platstdlib'), "config", "Makefile") - -# Issue #22199: retain undocumented private name for compatibility -_get_makefile_filename = get_makefile_filename - -def _generate_posix_vars(): - """Generate the Python module containing build-time variables.""" - import pprint - vars = {} - # load the installed Makefile: - makefile = get_makefile_filename() - try: - _parse_makefile(makefile, vars) - except IOError, e: - msg = "invalid Python installation: unable to open %s" % makefile - if hasattr(e, "strerror"): - msg = msg + " (%s)" % e.strerror - raise IOError(msg) - - # load the installed pyconfig.h: - config_h = get_config_h_filename() - try: - with open(config_h) as f: - parse_config_h(f, vars) - except IOError, e: - msg = "invalid Python installation: unable to open %s" % config_h - if hasattr(e, "strerror"): - msg = msg + " (%s)" % e.strerror - raise IOError(msg) - - # On AIX, there are wrong paths to the linker scripts in the Makefile - # -- these paths are relative to the Python source, but when installed - # the scripts are in another directory. - if _PYTHON_BUILD: - vars['LDSHARED'] = vars['BLDSHARED'] - - # There's a chicken-and-egg situation on OS X with regards to the - # _sysconfigdata module after the changes introduced by #15298: - # get_config_vars() is called by get_platform() as part of the - # `make pybuilddir.txt` target -- which is a precursor to the - # _sysconfigdata.py module being constructed. Unfortunately, - # get_config_vars() eventually calls _init_posix(), which attempts - # to import _sysconfigdata, which we won't have built yet. In order - # for _init_posix() to work, if we're on Darwin, just mock up the - # _sysconfigdata module manually and populate it with the build vars. - # This is more than sufficient for ensuring the subsequent call to - # get_platform() succeeds. - name = '_sysconfigdata' - if 'darwin' in sys.platform: - import imp - module = imp.new_module(name) - module.build_time_vars = vars - sys.modules[name] = module - - pybuilddir = 'build/lib.%s-%s' % (get_platform(), sys.version[:3]) - if hasattr(sys, "gettotalrefcount"): - pybuilddir += '-pydebug' - try: - os.makedirs(pybuilddir) - except OSError: - pass - destfile = os.path.join(pybuilddir, name + '.py') - - with open(destfile, 'wb') as f: - f.write('# system configuration generated and used by' - ' the sysconfig module\n') - f.write('build_time_vars = ') - pprint.pprint(vars, stream=f) - - # Create file used for sys.path fixup -- see Modules/getpath.c - with open('pybuilddir.txt', 'w') as f: - f.write(pybuilddir) - -def _init_posix(vars): - """Initialize the module as appropriate for POSIX systems.""" - # _sysconfigdata is generated at build time, see _generate_posix_vars() - from _sysconfigdata import build_time_vars - vars.update(build_time_vars) - -def _init_non_posix(vars): - """Initialize the module as appropriate for NT""" - # set basic install directories - vars['LIBDEST'] = get_path('stdlib') - vars['BINLIBDEST'] = get_path('platstdlib') - vars['INCLUDEPY'] = get_path('include') - vars['SO'] = '.pyd' - vars['EXE'] = '.exe' - vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT - vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable)) - -# -# public APIs -# - - -def parse_config_h(fp, vars=None): - """Parse a config.h-style file. - - A dictionary containing name/value pairs is returned. If an - optional dictionary is passed in as the second argument, it is - used instead of a new dictionary. - """ - import re - if vars is None: - vars = {} - define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n") - undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n") - - while True: - line = fp.readline() - if not line: - break - m = define_rx.match(line) - if m: - n, v = m.group(1, 2) - try: v = int(v) - except ValueError: pass - vars[n] = v - else: - m = undef_rx.match(line) - if m: - vars[m.group(1)] = 0 - return vars - -def get_config_h_filename(): - """Returns the path of pyconfig.h.""" - if _PYTHON_BUILD: - if os.name == "nt": - inc_dir = os.path.join(_PROJECT_BASE, "PC") - else: - inc_dir = _PROJECT_BASE - else: - inc_dir = get_path('platinclude') - return os.path.join(inc_dir, 'pyconfig.h') - -def get_scheme_names(): - """Returns a tuple containing the schemes names.""" - schemes = _INSTALL_SCHEMES.keys() - schemes.sort() - return tuple(schemes) - -def get_path_names(): - """Returns a tuple containing the paths names.""" - return _SCHEME_KEYS - -def get_paths(scheme=_get_default_scheme(), vars=None, expand=True): - """Returns a mapping containing an install scheme. - - ``scheme`` is the install scheme name. If not provided, it will - return the default scheme for the current platform. - """ - if expand: - return _expand_vars(scheme, vars) - else: - return _INSTALL_SCHEMES[scheme] - -def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True): - """Returns a path corresponding to the scheme. - - ``scheme`` is the install scheme name. - """ - return get_paths(scheme, vars, expand)[name] - -def get_config_vars(*args): - """With no arguments, return a dictionary of all configuration - variables relevant for the current platform. - - On Unix, this means every variable defined in Python's installed Makefile; - On Windows and Mac OS it's a much smaller set. - - With arguments, return a list of values that result from looking up - each argument in the configuration variable dictionary. - """ - import re - global _CONFIG_VARS - if _CONFIG_VARS is None: - _CONFIG_VARS = {} - # Normalized versions of prefix and exec_prefix are handy to have; - # in fact, these are the standard versions used most places in the - # Distutils. - _CONFIG_VARS['prefix'] = _PREFIX - _CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX - _CONFIG_VARS['py_version'] = _PY_VERSION - _CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT - _CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2] - _CONFIG_VARS['base'] = _PREFIX - _CONFIG_VARS['platbase'] = _EXEC_PREFIX - _CONFIG_VARS['projectbase'] = _PROJECT_BASE - - if os.name in ('nt', 'os2'): - _init_non_posix(_CONFIG_VARS) - if os.name == 'posix': - _init_posix(_CONFIG_VARS) - - # Setting 'userbase' is done below the call to the - # init function to enable using 'get_config_var' in - # the init-function. - _CONFIG_VARS['userbase'] = _getuserbase() - - if 'srcdir' not in _CONFIG_VARS: - _CONFIG_VARS['srcdir'] = _PROJECT_BASE - - # Convert srcdir into an absolute path if it appears necessary. - # Normally it is relative to the build directory. However, during - # testing, for example, we might be running a non-installed python - # from a different directory. - if _PYTHON_BUILD and os.name == "posix": - base = _PROJECT_BASE - try: - cwd = os.getcwd() - except OSError: - cwd = None - if (not os.path.isabs(_CONFIG_VARS['srcdir']) and - base != cwd): - # srcdir is relative and we are not in the same directory - # as the executable. Assume executable is in the build - # directory and make srcdir absolute. - srcdir = os.path.join(base, _CONFIG_VARS['srcdir']) - _CONFIG_VARS['srcdir'] = os.path.normpath(srcdir) - - # OS X platforms require special customization to handle - # multi-architecture, multi-os-version installers - if sys.platform == 'darwin': - import _osx_support - _osx_support.customize_config_vars(_CONFIG_VARS) - - if args: - vals = [] - for name in args: - vals.append(_CONFIG_VARS.get(name)) - return vals - else: - return _CONFIG_VARS - -def get_config_var(name): - """Return the value of a single variable using the dictionary returned by - 'get_config_vars()'. - - Equivalent to get_config_vars().get(name) - """ - return get_config_vars().get(name) - -def get_platform(): - """Return a string that identifies the current platform. - - This is used mainly to distinguish platform-specific build directories and - platform-specific built distributions. Typically includes the OS name - and version and the architecture (as supplied by 'os.uname()'), - although the exact information included depends on the OS; eg. for IRIX - the architecture isn't particularly important (IRIX only runs on SGI - hardware), but for Linux the kernel version isn't particularly - important. - - Examples of returned values: - linux-i586 - linux-alpha (?) - solaris-2.6-sun4u - irix-5.3 - irix64-6.2 - - Windows will return one of: - win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc) - win-ia64 (64bit Windows on Itanium) - win32 (all others - specifically, sys.platform is returned) - - For other non-POSIX platforms, currently just returns 'sys.platform'. - """ - import re - if os.name == 'nt': - # sniff sys.version for architecture. - prefix = " bit (" - i = sys.version.find(prefix) - if i == -1: - return sys.platform - j = sys.version.find(")", i) - look = sys.version[i+len(prefix):j].lower() - if look == 'amd64': - return 'win-amd64' - if look == 'itanium': - return 'win-ia64' - return sys.platform - - # Set for cross builds explicitly - if "_PYTHON_HOST_PLATFORM" in os.environ: - return os.environ["_PYTHON_HOST_PLATFORM"] - - if os.name != "posix" or not hasattr(os, 'uname'): - # XXX what about the architecture? NT is Intel or Alpha, - # Mac OS is M68k or PPC, etc. - return sys.platform - - # Try to distinguish various flavours of Unix - osname, host, release, version, machine = os.uname() - - # Convert the OS name to lowercase, remove '/' characters - # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh") - osname = osname.lower().replace('/', '') - machine = machine.replace(' ', '_') - machine = machine.replace('/', '-') - - if osname[:5] == "linux": - # At least on Linux/Intel, 'machine' is the processor -- - # i386, etc. - # XXX what about Alpha, SPARC, etc? - return "%s-%s" % (osname, machine) - elif osname[:5] == "sunos": - if release[0] >= "5": # SunOS 5 == Solaris 2 - osname = "solaris" - release = "%d.%s" % (int(release[0]) - 3, release[2:]) - # We can't use "platform.architecture()[0]" because a - # bootstrap problem. We use a dict to get an error - # if some suspicious happens. - bitness = {2147483647:"32bit", 9223372036854775807:"64bit"} - machine += ".%s" % bitness[sys.maxint] - # fall through to standard osname-release-machine representation - elif osname[:4] == "irix": # could be "irix64"! - return "%s-%s" % (osname, release) - elif osname[:3] == "aix": - return "%s-%s.%s" % (osname, version, release) - elif osname[:6] == "cygwin": - osname = "cygwin" - rel_re = re.compile (r'[\d.]+') - m = rel_re.match(release) - if m: - release = m.group() - elif osname[:6] == "darwin": - import _osx_support - osname, release, machine = _osx_support.get_platform_osx( - get_config_vars(), - osname, release, machine) - - return "%s-%s-%s" % (osname, release, machine) - - -def get_python_version(): - return _PY_VERSION_SHORT - - -def _print_dict(title, data): - for index, (key, value) in enumerate(sorted(data.items())): - if index == 0: - print '%s: ' % (title) - print '\t%s = "%s"' % (key, value) - - -def _main(): - """Display all information sysconfig detains.""" - if '--generate-posix-vars' in sys.argv: - _generate_posix_vars() - return - print 'Platform: "%s"' % get_platform() - print 'Python version: "%s"' % get_python_version() - print 'Current installation scheme: "%s"' % _get_default_scheme() - print - _print_dict('Paths', get_paths()) - print - _print_dict('Variables', get_config_vars()) - - -if __name__ == '__main__': - _main() diff --git a/python/Lib/tabnanny.py b/python/Lib/tabnanny.py deleted file mode 100755 index 76665ac91a..0000000000 --- a/python/Lib/tabnanny.py +++ /dev/null @@ -1,329 +0,0 @@ -#! /usr/bin/env python - -"""The Tab Nanny despises ambiguous indentation. She knows no mercy. - -tabnanny -- Detection of ambiguous indentation - -For the time being this module is intended to be called as a script. -However it is possible to import it into an IDE and use the function -check() described below. - -Warning: The API provided by this module is likely to change in future -releases; such changes may not be backward compatible. -""" - -# Released to the public domain, by Tim Peters, 15 April 1998. - -# XXX Note: this is now a standard library module. -# XXX The API needs to undergo changes however; the current code is too -# XXX script-like. This will be addressed later. - -__version__ = "6" - -import os -import sys -import getopt -import tokenize -if not hasattr(tokenize, 'NL'): - raise ValueError("tokenize.NL doesn't exist -- tokenize module too old") - -__all__ = ["check", "NannyNag", "process_tokens"] - -verbose = 0 -filename_only = 0 - -def errprint(*args): - sep = "" - for arg in args: - sys.stderr.write(sep + str(arg)) - sep = " " - sys.stderr.write("\n") - -def main(): - global verbose, filename_only - try: - opts, args = getopt.getopt(sys.argv[1:], "qv") - except getopt.error, msg: - errprint(msg) - return - for o, a in opts: - if o == '-q': - filename_only = filename_only + 1 - if o == '-v': - verbose = verbose + 1 - if not args: - errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") - return - for arg in args: - check(arg) - -class NannyNag(Exception): - """ - Raised by tokeneater() if detecting an ambiguous indent. - Captured and handled in check(). - """ - def __init__(self, lineno, msg, line): - self.lineno, self.msg, self.line = lineno, msg, line - def get_lineno(self): - return self.lineno - def get_msg(self): - return self.msg - def get_line(self): - return self.line - -def check(file): - """check(file_or_dir) - - If file_or_dir is a directory and not a symbolic link, then recursively - descend the directory tree named by file_or_dir, checking all .py files - along the way. If file_or_dir is an ordinary Python source file, it is - checked for whitespace related problems. The diagnostic messages are - written to standard output using the print statement. - """ - - if os.path.isdir(file) and not os.path.islink(file): - if verbose: - print "%r: listing directory" % (file,) - names = os.listdir(file) - for name in names: - fullname = os.path.join(file, name) - if (os.path.isdir(fullname) and - not os.path.islink(fullname) or - os.path.normcase(name[-3:]) == ".py"): - check(fullname) - return - - try: - f = open(file) - except IOError, msg: - errprint("%r: I/O Error: %s" % (file, msg)) - return - - if verbose > 1: - print "checking %r ..." % file - - try: - process_tokens(tokenize.generate_tokens(f.readline)) - - except tokenize.TokenError, msg: - errprint("%r: Token Error: %s" % (file, msg)) - return - - except IndentationError, msg: - errprint("%r: Indentation Error: %s" % (file, msg)) - return - - except NannyNag, nag: - badline = nag.get_lineno() - line = nag.get_line() - if verbose: - print "%r: *** Line %d: trouble in tab city! ***" % (file, badline) - print "offending line: %r" % (line,) - print nag.get_msg() - else: - if ' ' in file: file = '"' + file + '"' - if filename_only: print file - else: print file, badline, repr(line) - return - - if verbose: - print "%r: Clean bill of health." % (file,) - -class Whitespace: - # the characters used for space and tab - S, T = ' \t' - - # members: - # raw - # the original string - # n - # the number of leading whitespace characters in raw - # nt - # the number of tabs in raw[:n] - # norm - # the normal form as a pair (count, trailing), where: - # count - # a tuple such that raw[:n] contains count[i] - # instances of S * i + T - # trailing - # the number of trailing spaces in raw[:n] - # It's A Theorem that m.indent_level(t) == - # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. - # is_simple - # true iff raw[:n] is of the form (T*)(S*) - - def __init__(self, ws): - self.raw = ws - S, T = Whitespace.S, Whitespace.T - count = [] - b = n = nt = 0 - for ch in self.raw: - if ch == S: - n = n + 1 - b = b + 1 - elif ch == T: - n = n + 1 - nt = nt + 1 - if b >= len(count): - count = count + [0] * (b - len(count) + 1) - count[b] = count[b] + 1 - b = 0 - else: - break - self.n = n - self.nt = nt - self.norm = tuple(count), b - self.is_simple = len(count) <= 1 - - # return length of longest contiguous run of spaces (whether or not - # preceding a tab) - def longest_run_of_spaces(self): - count, trailing = self.norm - return max(len(count)-1, trailing) - - def indent_level(self, tabsize): - # count, il = self.norm - # for i in range(len(count)): - # if count[i]: - # il = il + (i/tabsize + 1)*tabsize * count[i] - # return il - - # quicker: - # il = trailing + sum (i/ts + 1)*ts*count[i] = - # trailing + ts * sum (i/ts + 1)*count[i] = - # trailing + ts * sum i/ts*count[i] + count[i] = - # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] = - # trailing + ts * [(sum i/ts*count[i]) + num_tabs] - # and note that i/ts*count[i] is 0 when i < ts - - count, trailing = self.norm - il = 0 - for i in range(tabsize, len(count)): - il = il + i/tabsize * count[i] - return trailing + tabsize * (il + self.nt) - - # return true iff self.indent_level(t) == other.indent_level(t) - # for all t >= 1 - def equal(self, other): - return self.norm == other.norm - - # return a list of tuples (ts, i1, i2) such that - # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. - # Intended to be used after not self.equal(other) is known, in which - # case it will return at least one witnessing tab size. - def not_equal_witness(self, other): - n = max(self.longest_run_of_spaces(), - other.longest_run_of_spaces()) + 1 - a = [] - for ts in range(1, n+1): - if self.indent_level(ts) != other.indent_level(ts): - a.append( (ts, - self.indent_level(ts), - other.indent_level(ts)) ) - return a - - # Return True iff self.indent_level(t) < other.indent_level(t) - # for all t >= 1. - # The algorithm is due to Vincent Broman. - # Easy to prove it's correct. - # XXXpost that. - # Trivial to prove n is sharp (consider T vs ST). - # Unknown whether there's a faster general way. I suspected so at - # first, but no longer. - # For the special (but common!) case where M and N are both of the - # form (T*)(S*), M.less(N) iff M.len() < N.len() and - # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. - # XXXwrite that up. - # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. - def less(self, other): - if self.n >= other.n: - return False - if self.is_simple and other.is_simple: - return self.nt <= other.nt - n = max(self.longest_run_of_spaces(), - other.longest_run_of_spaces()) + 1 - # the self.n >= other.n test already did it for ts=1 - for ts in range(2, n+1): - if self.indent_level(ts) >= other.indent_level(ts): - return False - return True - - # return a list of tuples (ts, i1, i2) such that - # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. - # Intended to be used after not self.less(other) is known, in which - # case it will return at least one witnessing tab size. - def not_less_witness(self, other): - n = max(self.longest_run_of_spaces(), - other.longest_run_of_spaces()) + 1 - a = [] - for ts in range(1, n+1): - if self.indent_level(ts) >= other.indent_level(ts): - a.append( (ts, - self.indent_level(ts), - other.indent_level(ts)) ) - return a - -def format_witnesses(w): - firsts = map(lambda tup: str(tup[0]), w) - prefix = "at tab size" - if len(w) > 1: - prefix = prefix + "s" - return prefix + " " + ', '.join(firsts) - -def process_tokens(tokens): - INDENT = tokenize.INDENT - DEDENT = tokenize.DEDENT - NEWLINE = tokenize.NEWLINE - JUNK = tokenize.COMMENT, tokenize.NL - indents = [Whitespace("")] - check_equal = 0 - - for (type, token, start, end, line) in tokens: - if type == NEWLINE: - # a program statement, or ENDMARKER, will eventually follow, - # after some (possibly empty) run of tokens of the form - # (NL | COMMENT)* (INDENT | DEDENT+)? - # If an INDENT appears, setting check_equal is wrong, and will - # be undone when we see the INDENT. - check_equal = 1 - - elif type == INDENT: - check_equal = 0 - thisguy = Whitespace(token) - if not indents[-1].less(thisguy): - witness = indents[-1].not_less_witness(thisguy) - msg = "indent not greater e.g. " + format_witnesses(witness) - raise NannyNag(start[0], msg, line) - indents.append(thisguy) - - elif type == DEDENT: - # there's nothing we need to check here! what's important is - # that when the run of DEDENTs ends, the indentation of the - # program statement (or ENDMARKER) that triggered the run is - # equal to what's left at the top of the indents stack - - # Ouch! This assert triggers if the last line of the source - # is indented *and* lacks a newline -- then DEDENTs pop out - # of thin air. - # assert check_equal # else no earlier NEWLINE, or an earlier INDENT - check_equal = 1 - - del indents[-1] - - elif check_equal and type not in JUNK: - # this is the first "real token" following a NEWLINE, so it - # must be the first token of the next program statement, or an - # ENDMARKER; the "line" argument exposes the leading whitespace - # for this statement; in the case of ENDMARKER, line is an empty - # string, so will properly match the empty string with which the - # "indents" stack was seeded - check_equal = 0 - thisguy = Whitespace(line) - if not indents[-1].equal(thisguy): - witness = indents[-1].not_equal_witness(thisguy) - msg = "indent not equal e.g. " + format_witnesses(witness) - raise NannyNag(start[0], msg, line) - - -if __name__ == '__main__': - main() diff --git a/python/Lib/tarfile.py b/python/Lib/tarfile.py deleted file mode 100755 index 5a317649f1..0000000000 --- a/python/Lib/tarfile.py +++ /dev/null @@ -1,2630 +0,0 @@ -# -*- coding: iso-8859-1 -*- -#------------------------------------------------------------------- -# tarfile.py -#------------------------------------------------------------------- -# Copyright (C) 2002 Lars Gustäbel -# All rights reserved. -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation -# files (the "Software"), to deal in the Software without -# restriction, including without limitation the rights to use, -# copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following -# conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -"""Read from and write to tar format archives. -""" - -__version__ = "$Revision: 85213 $" -# $Source$ - -version = "0.9.0" -__author__ = "Lars Gustäbel (lars@gustaebel.de)" -__date__ = "$Date$" -__cvsid__ = "$Id$" -__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend." - -#--------- -# Imports -#--------- -from __builtin__ import open as bltn_open -import sys -import os -import shutil -import stat -import errno -import time -import struct -import copy -import re -import operator - -try: - import grp, pwd -except ImportError: - grp = pwd = None - -# from tarfile import * -__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] - -#--------------------------------------------------------- -# tar constants -#--------------------------------------------------------- -NUL = "\0" # the null character -BLOCKSIZE = 512 # length of processing blocks -RECORDSIZE = BLOCKSIZE * 20 # length of records -GNU_MAGIC = "ustar \0" # magic gnu tar string -POSIX_MAGIC = "ustar\x0000" # magic posix tar string - -LENGTH_NAME = 100 # maximum length of a filename -LENGTH_LINK = 100 # maximum length of a linkname -LENGTH_PREFIX = 155 # maximum length of the prefix field - -REGTYPE = "0" # regular file -AREGTYPE = "\0" # regular file -LNKTYPE = "1" # link (inside tarfile) -SYMTYPE = "2" # symbolic link -CHRTYPE = "3" # character special device -BLKTYPE = "4" # block special device -DIRTYPE = "5" # directory -FIFOTYPE = "6" # fifo special device -CONTTYPE = "7" # contiguous file - -GNUTYPE_LONGNAME = "L" # GNU tar longname -GNUTYPE_LONGLINK = "K" # GNU tar longlink -GNUTYPE_SPARSE = "S" # GNU tar sparse file - -XHDTYPE = "x" # POSIX.1-2001 extended header -XGLTYPE = "g" # POSIX.1-2001 global header -SOLARIS_XHDTYPE = "X" # Solaris extended header - -USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format -GNU_FORMAT = 1 # GNU tar format -PAX_FORMAT = 2 # POSIX.1-2001 (pax) format -DEFAULT_FORMAT = GNU_FORMAT - -#--------------------------------------------------------- -# tarfile constants -#--------------------------------------------------------- -# File types that tarfile supports: -SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, - SYMTYPE, DIRTYPE, FIFOTYPE, - CONTTYPE, CHRTYPE, BLKTYPE, - GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, - GNUTYPE_SPARSE) - -# File types that will be treated as a regular file. -REGULAR_TYPES = (REGTYPE, AREGTYPE, - CONTTYPE, GNUTYPE_SPARSE) - -# File types that are part of the GNU tar format. -GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, - GNUTYPE_SPARSE) - -# Fields from a pax header that override a TarInfo attribute. -PAX_FIELDS = ("path", "linkpath", "size", "mtime", - "uid", "gid", "uname", "gname") - -# Fields in a pax header that are numbers, all other fields -# are treated as strings. -PAX_NUMBER_FIELDS = { - "atime": float, - "ctime": float, - "mtime": float, - "uid": int, - "gid": int, - "size": int -} - -#--------------------------------------------------------- -# Bits used in the mode field, values in octal. -#--------------------------------------------------------- -S_IFLNK = 0120000 # symbolic link -S_IFREG = 0100000 # regular file -S_IFBLK = 0060000 # block device -S_IFDIR = 0040000 # directory -S_IFCHR = 0020000 # character device -S_IFIFO = 0010000 # fifo - -TSUID = 04000 # set UID on execution -TSGID = 02000 # set GID on execution -TSVTX = 01000 # reserved - -TUREAD = 0400 # read by owner -TUWRITE = 0200 # write by owner -TUEXEC = 0100 # execute/search by owner -TGREAD = 0040 # read by group -TGWRITE = 0020 # write by group -TGEXEC = 0010 # execute/search by group -TOREAD = 0004 # read by other -TOWRITE = 0002 # write by other -TOEXEC = 0001 # execute/search by other - -#--------------------------------------------------------- -# initialization -#--------------------------------------------------------- -ENCODING = sys.getfilesystemencoding() -if ENCODING is None: - ENCODING = sys.getdefaultencoding() - -#--------------------------------------------------------- -# Some useful functions -#--------------------------------------------------------- - -def stn(s, length): - """Convert a python string to a null-terminated string buffer. - """ - return s[:length] + (length - len(s)) * NUL - -def nts(s): - """Convert a null-terminated string field to a python string. - """ - # Use the string up to the first null char. - p = s.find("\0") - if p == -1: - return s - return s[:p] - -def nti(s): - """Convert a number field to a python number. - """ - # There are two possible encodings for a number field, see - # itn() below. - if s[0] != chr(0200): - try: - n = int(nts(s).strip() or "0", 8) - except ValueError: - raise InvalidHeaderError("invalid header") - else: - n = 0L - for i in xrange(len(s) - 1): - n <<= 8 - n += ord(s[i + 1]) - return n - -def itn(n, digits=8, format=DEFAULT_FORMAT): - """Convert a python number to a number field. - """ - # POSIX 1003.1-1988 requires numbers to be encoded as a string of - # octal digits followed by a null-byte, this allows values up to - # (8**(digits-1))-1. GNU tar allows storing numbers greater than - # that if necessary. A leading 0200 byte indicates this particular - # encoding, the following digits-1 bytes are a big-endian - # representation. This allows values up to (256**(digits-1))-1. - if 0 <= n < 8 ** (digits - 1): - s = "%0*o" % (digits - 1, n) + NUL - else: - if format != GNU_FORMAT or n >= 256 ** (digits - 1): - raise ValueError("overflow in number field") - - if n < 0: - # XXX We mimic GNU tar's behaviour with negative numbers, - # this could raise OverflowError. - n = struct.unpack("L", struct.pack("l", n))[0] - - s = "" - for i in xrange(digits - 1): - s = chr(n & 0377) + s - n >>= 8 - s = chr(0200) + s - return s - -def uts(s, encoding, errors): - """Convert a unicode object to a string. - """ - if errors == "utf-8": - # An extra error handler similar to the -o invalid=UTF-8 option - # in POSIX.1-2001. Replace untranslatable characters with their - # UTF-8 representation. - try: - return s.encode(encoding, "strict") - except UnicodeEncodeError: - x = [] - for c in s: - try: - x.append(c.encode(encoding, "strict")) - except UnicodeEncodeError: - x.append(c.encode("utf8")) - return "".join(x) - else: - return s.encode(encoding, errors) - -def calc_chksums(buf): - """Calculate the checksum for a member's header by summing up all - characters except for the chksum field which is treated as if - it was filled with spaces. According to the GNU tar sources, - some tars (Sun and NeXT) calculate chksum with signed char, - which will be different if there are chars in the buffer with - the high bit set. So we calculate two checksums, unsigned and - signed. - """ - unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) - signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) - return unsigned_chksum, signed_chksum - -def copyfileobj(src, dst, length=None): - """Copy length bytes from fileobj src to fileobj dst. - If length is None, copy the entire content. - """ - if length == 0: - return - if length is None: - shutil.copyfileobj(src, dst) - return - - BUFSIZE = 16 * 1024 - blocks, remainder = divmod(length, BUFSIZE) - for b in xrange(blocks): - buf = src.read(BUFSIZE) - if len(buf) < BUFSIZE: - raise IOError("end of file reached") - dst.write(buf) - - if remainder != 0: - buf = src.read(remainder) - if len(buf) < remainder: - raise IOError("end of file reached") - dst.write(buf) - return - -filemode_table = ( - ((S_IFLNK, "l"), - (S_IFREG, "-"), - (S_IFBLK, "b"), - (S_IFDIR, "d"), - (S_IFCHR, "c"), - (S_IFIFO, "p")), - - ((TUREAD, "r"),), - ((TUWRITE, "w"),), - ((TUEXEC|TSUID, "s"), - (TSUID, "S"), - (TUEXEC, "x")), - - ((TGREAD, "r"),), - ((TGWRITE, "w"),), - ((TGEXEC|TSGID, "s"), - (TSGID, "S"), - (TGEXEC, "x")), - - ((TOREAD, "r"),), - ((TOWRITE, "w"),), - ((TOEXEC|TSVTX, "t"), - (TSVTX, "T"), - (TOEXEC, "x")) -) - -def filemode(mode): - """Convert a file's mode to a string of the form - -rwxrwxrwx. - Used by TarFile.list() - """ - perm = [] - for table in filemode_table: - for bit, char in table: - if mode & bit == bit: - perm.append(char) - break - else: - perm.append("-") - return "".join(perm) - -class TarError(Exception): - """Base exception.""" - pass -class ExtractError(TarError): - """General exception for extract errors.""" - pass -class ReadError(TarError): - """Exception for unreadable tar archives.""" - pass -class CompressionError(TarError): - """Exception for unavailable compression methods.""" - pass -class StreamError(TarError): - """Exception for unsupported operations on stream-like TarFiles.""" - pass -class HeaderError(TarError): - """Base exception for header errors.""" - pass -class EmptyHeaderError(HeaderError): - """Exception for empty headers.""" - pass -class TruncatedHeaderError(HeaderError): - """Exception for truncated headers.""" - pass -class EOFHeaderError(HeaderError): - """Exception for end of file headers.""" - pass -class InvalidHeaderError(HeaderError): - """Exception for invalid headers.""" - pass -class SubsequentHeaderError(HeaderError): - """Exception for missing and invalid extended headers.""" - pass - -#--------------------------- -# internal stream interface -#--------------------------- -class _LowLevelFile: - """Low-level file object. Supports reading and writing. - It is used instead of a regular file object for streaming - access. - """ - - def __init__(self, name, mode): - mode = { - "r": os.O_RDONLY, - "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, - }[mode] - if hasattr(os, "O_BINARY"): - mode |= os.O_BINARY - self.fd = os.open(name, mode, 0666) - - def close(self): - os.close(self.fd) - - def read(self, size): - return os.read(self.fd, size) - - def write(self, s): - os.write(self.fd, s) - -class _Stream: - """Class that serves as an adapter between TarFile and - a stream-like object. The stream-like object only - needs to have a read() or write() method and is accessed - blockwise. Use of gzip or bzip2 compression is possible. - A stream-like object could be for example: sys.stdin, - sys.stdout, a socket, a tape device etc. - - _Stream is intended to be used only internally. - """ - - def __init__(self, name, mode, comptype, fileobj, bufsize): - """Construct a _Stream object. - """ - self._extfileobj = True - if fileobj is None: - fileobj = _LowLevelFile(name, mode) - self._extfileobj = False - - if comptype == '*': - # Enable transparent compression detection for the - # stream interface - fileobj = _StreamProxy(fileobj) - comptype = fileobj.getcomptype() - - self.name = name or "" - self.mode = mode - self.comptype = comptype - self.fileobj = fileobj - self.bufsize = bufsize - self.buf = "" - self.pos = 0L - self.closed = False - - try: - if comptype == "gz": - try: - import zlib - except ImportError: - raise CompressionError("zlib module is not available") - self.zlib = zlib - self.crc = zlib.crc32("") & 0xffffffffL - if mode == "r": - self._init_read_gz() - else: - self._init_write_gz() - - elif comptype == "bz2": - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - if mode == "r": - self.dbuf = "" - self.cmp = bz2.BZ2Decompressor() - else: - self.cmp = bz2.BZ2Compressor() - except: - if not self._extfileobj: - self.fileobj.close() - self.closed = True - raise - - def __del__(self): - if hasattr(self, "closed") and not self.closed: - self.close() - - def _init_write_gz(self): - """Initialize for writing with gzip compression. - """ - self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, - -self.zlib.MAX_WBITS, - self.zlib.DEF_MEM_LEVEL, - 0) - timestamp = struct.pack(" self.bufsize: - self.fileobj.write(self.buf[:self.bufsize]) - self.buf = self.buf[self.bufsize:] - - def close(self): - """Close the _Stream object. No operation should be - done on it afterwards. - """ - if self.closed: - return - - self.closed = True - try: - if self.mode == "w" and self.comptype != "tar": - self.buf += self.cmp.flush() - - if self.mode == "w" and self.buf: - self.fileobj.write(self.buf) - self.buf = "" - if self.comptype == "gz": - # The native zlib crc is an unsigned 32-bit integer, but - # the Python wrapper implicitly casts that to a signed C - # long. So, on a 32-bit box self.crc may "look negative", - # while the same crc on a 64-bit box may "look positive". - # To avoid irksome warnings from the `struct` module, force - # it to look positive on all boxes. - self.fileobj.write(struct.pack("= 0: - blocks, remainder = divmod(pos - self.pos, self.bufsize) - for i in xrange(blocks): - self.read(self.bufsize) - self.read(remainder) - else: - raise StreamError("seeking backwards is not allowed") - return self.pos - - def read(self, size=None): - """Return the next size number of bytes from the stream. - If size is not defined, return all bytes of the stream - up to EOF. - """ - if size is None: - t = [] - while True: - buf = self._read(self.bufsize) - if not buf: - break - t.append(buf) - buf = "".join(t) - else: - buf = self._read(size) - self.pos += len(buf) - return buf - - def _read(self, size): - """Return size bytes from the stream. - """ - if self.comptype == "tar": - return self.__read(size) - - c = len(self.dbuf) - t = [self.dbuf] - while c < size: - buf = self.__read(self.bufsize) - if not buf: - break - try: - buf = self.cmp.decompress(buf) - except IOError: - raise ReadError("invalid compressed data") - t.append(buf) - c += len(buf) - t = "".join(t) - self.dbuf = t[size:] - return t[:size] - - def __read(self, size): - """Return size bytes from stream. If internal buffer is empty, - read another block from the stream. - """ - c = len(self.buf) - t = [self.buf] - while c < size: - buf = self.fileobj.read(self.bufsize) - if not buf: - break - t.append(buf) - c += len(buf) - t = "".join(t) - self.buf = t[size:] - return t[:size] -# class _Stream - -class _StreamProxy(object): - """Small proxy class that enables transparent compression - detection for the Stream interface (mode 'r|*'). - """ - - def __init__(self, fileobj): - self.fileobj = fileobj - self.buf = self.fileobj.read(BLOCKSIZE) - - def read(self, size): - self.read = self.fileobj.read - return self.buf - - def getcomptype(self): - if self.buf.startswith("\037\213\010"): - return "gz" - if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY": - return "bz2" - return "tar" - - def close(self): - self.fileobj.close() -# class StreamProxy - -class _BZ2Proxy(object): - """Small proxy class that enables external file object - support for "r:bz2" and "w:bz2" modes. This is actually - a workaround for a limitation in bz2 module's BZ2File - class which (unlike gzip.GzipFile) has no support for - a file object argument. - """ - - blocksize = 16 * 1024 - - def __init__(self, fileobj, mode): - self.fileobj = fileobj - self.mode = mode - self.name = getattr(self.fileobj, "name", None) - self.init() - - def init(self): - import bz2 - self.pos = 0 - if self.mode == "r": - self.bz2obj = bz2.BZ2Decompressor() - self.fileobj.seek(0) - self.buf = "" - else: - self.bz2obj = bz2.BZ2Compressor() - - def read(self, size): - b = [self.buf] - x = len(self.buf) - while x < size: - raw = self.fileobj.read(self.blocksize) - if not raw: - break - data = self.bz2obj.decompress(raw) - b.append(data) - x += len(data) - self.buf = "".join(b) - - buf = self.buf[:size] - self.buf = self.buf[size:] - self.pos += len(buf) - return buf - - def seek(self, pos): - if pos < self.pos: - self.init() - self.read(pos - self.pos) - - def tell(self): - return self.pos - - def write(self, data): - self.pos += len(data) - raw = self.bz2obj.compress(data) - self.fileobj.write(raw) - - def close(self): - if self.mode == "w": - raw = self.bz2obj.flush() - self.fileobj.write(raw) -# class _BZ2Proxy - -#------------------------ -# Extraction file object -#------------------------ -class _FileInFile(object): - """A thin wrapper around an existing file object that - provides a part of its data as an individual file - object. - """ - - def __init__(self, fileobj, offset, size, sparse=None): - self.fileobj = fileobj - self.offset = offset - self.size = size - self.sparse = sparse - self.position = 0 - - def tell(self): - """Return the current file position. - """ - return self.position - - def seek(self, position): - """Seek to a position in the file. - """ - self.position = position - - def read(self, size=None): - """Read data from the file. - """ - if size is None: - size = self.size - self.position - else: - size = min(size, self.size - self.position) - - if self.sparse is None: - return self.readnormal(size) - else: - return self.readsparse(size) - - def __read(self, size): - buf = self.fileobj.read(size) - if len(buf) != size: - raise ReadError("unexpected end of data") - return buf - - def readnormal(self, size): - """Read operation for regular files. - """ - self.fileobj.seek(self.offset + self.position) - self.position += size - return self.__read(size) - - def readsparse(self, size): - """Read operation for sparse files. - """ - data = [] - while size > 0: - buf = self.readsparsesection(size) - if not buf: - break - size -= len(buf) - data.append(buf) - return "".join(data) - - def readsparsesection(self, size): - """Read a single section of a sparse file. - """ - section = self.sparse.find(self.position) - - if section is None: - return "" - - size = min(size, section.offset + section.size - self.position) - - if isinstance(section, _data): - realpos = section.realpos + self.position - section.offset - self.fileobj.seek(self.offset + realpos) - self.position += size - return self.__read(size) - else: - self.position += size - return NUL * size -#class _FileInFile - - -class ExFileObject(object): - """File-like object for reading an archive member. - Is returned by TarFile.extractfile(). - """ - blocksize = 1024 - - def __init__(self, tarfile, tarinfo): - self.fileobj = _FileInFile(tarfile.fileobj, - tarinfo.offset_data, - tarinfo.size, - getattr(tarinfo, "sparse", None)) - self.name = tarinfo.name - self.mode = "r" - self.closed = False - self.size = tarinfo.size - - self.position = 0 - self.buffer = "" - - def read(self, size=None): - """Read at most size bytes from the file. If size is not - present or None, read all data until EOF is reached. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - buf = "" - if self.buffer: - if size is None: - buf = self.buffer - self.buffer = "" - else: - buf = self.buffer[:size] - self.buffer = self.buffer[size:] - - if size is None: - buf += self.fileobj.read() - else: - buf += self.fileobj.read(size - len(buf)) - - self.position += len(buf) - return buf - - def readline(self, size=-1): - """Read one entire line from the file. If size is present - and non-negative, return a string with at most that - size, which may be an incomplete line. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if "\n" in self.buffer: - pos = self.buffer.find("\n") + 1 - else: - buffers = [self.buffer] - while True: - buf = self.fileobj.read(self.blocksize) - buffers.append(buf) - if not buf or "\n" in buf: - self.buffer = "".join(buffers) - pos = self.buffer.find("\n") + 1 - if pos == 0: - # no newline found. - pos = len(self.buffer) - break - - if size != -1: - pos = min(size, pos) - - buf = self.buffer[:pos] - self.buffer = self.buffer[pos:] - self.position += len(buf) - return buf - - def readlines(self): - """Return a list with all remaining lines. - """ - result = [] - while True: - line = self.readline() - if not line: break - result.append(line) - return result - - def tell(self): - """Return the current file position. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - return self.position - - def seek(self, pos, whence=os.SEEK_SET): - """Seek to a position in the file. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if whence == os.SEEK_SET: - self.position = min(max(pos, 0), self.size) - elif whence == os.SEEK_CUR: - if pos < 0: - self.position = max(self.position + pos, 0) - else: - self.position = min(self.position + pos, self.size) - elif whence == os.SEEK_END: - self.position = max(min(self.size + pos, self.size), 0) - else: - raise ValueError("Invalid argument") - - self.buffer = "" - self.fileobj.seek(self.position) - - def close(self): - """Close the file object. - """ - self.closed = True - - def __iter__(self): - """Get an iterator over the file's lines. - """ - while True: - line = self.readline() - if not line: - break - yield line -#class ExFileObject - -#------------------ -# Exported Classes -#------------------ -class TarInfo(object): - """Informational class which holds the details about an - archive member given by a tar header block. - TarInfo objects are returned by TarFile.getmember(), - TarFile.getmembers() and TarFile.gettarinfo() and are - usually created internally. - """ - - def __init__(self, name=""): - """Construct a TarInfo object. name is the optional name - of the member. - """ - self.name = name # member name - self.mode = 0644 # file permissions - self.uid = 0 # user id - self.gid = 0 # group id - self.size = 0 # file size - self.mtime = 0 # modification time - self.chksum = 0 # header checksum - self.type = REGTYPE # member type - self.linkname = "" # link name - self.uname = "" # user name - self.gname = "" # group name - self.devmajor = 0 # device major number - self.devminor = 0 # device minor number - - self.offset = 0 # the tar header starts here - self.offset_data = 0 # the file's data starts here - - self.pax_headers = {} # pax header information - - # In pax headers the "name" and "linkname" field are called - # "path" and "linkpath". - def _getpath(self): - return self.name - def _setpath(self, name): - self.name = name - path = property(_getpath, _setpath) - - def _getlinkpath(self): - return self.linkname - def _setlinkpath(self, linkname): - self.linkname = linkname - linkpath = property(_getlinkpath, _setlinkpath) - - def __repr__(self): - return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) - - def get_info(self, encoding, errors): - """Return the TarInfo's attributes as a dictionary. - """ - info = { - "name": self.name, - "mode": self.mode & 07777, - "uid": self.uid, - "gid": self.gid, - "size": self.size, - "mtime": self.mtime, - "chksum": self.chksum, - "type": self.type, - "linkname": self.linkname, - "uname": self.uname, - "gname": self.gname, - "devmajor": self.devmajor, - "devminor": self.devminor - } - - if info["type"] == DIRTYPE and not info["name"].endswith("/"): - info["name"] += "/" - - for key in ("name", "linkname", "uname", "gname"): - if type(info[key]) is unicode: - info[key] = info[key].encode(encoding, errors) - - return info - - def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"): - """Return a tar header as a string of 512 byte blocks. - """ - info = self.get_info(encoding, errors) - - if format == USTAR_FORMAT: - return self.create_ustar_header(info) - elif format == GNU_FORMAT: - return self.create_gnu_header(info) - elif format == PAX_FORMAT: - return self.create_pax_header(info, encoding, errors) - else: - raise ValueError("invalid format") - - def create_ustar_header(self, info): - """Return the object as a ustar header block. - """ - info["magic"] = POSIX_MAGIC - - if len(info["linkname"]) > LENGTH_LINK: - raise ValueError("linkname is too long") - - if len(info["name"]) > LENGTH_NAME: - info["prefix"], info["name"] = self._posix_split_name(info["name"]) - - return self._create_header(info, USTAR_FORMAT) - - def create_gnu_header(self, info): - """Return the object as a GNU header block sequence. - """ - info["magic"] = GNU_MAGIC - - buf = "" - if len(info["linkname"]) > LENGTH_LINK: - buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) - - if len(info["name"]) > LENGTH_NAME: - buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME) - - return buf + self._create_header(info, GNU_FORMAT) - - def create_pax_header(self, info, encoding, errors): - """Return the object as a ustar header block. If it cannot be - represented this way, prepend a pax extended header sequence - with supplement information. - """ - info["magic"] = POSIX_MAGIC - pax_headers = self.pax_headers.copy() - - # Test string fields for values that exceed the field length or cannot - # be represented in ASCII encoding. - for name, hname, length in ( - ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), - ("uname", "uname", 32), ("gname", "gname", 32)): - - if hname in pax_headers: - # The pax header has priority. - continue - - val = info[name].decode(encoding, errors) - - # Try to encode the string as ASCII. - try: - val.encode("ascii") - except UnicodeEncodeError: - pax_headers[hname] = val - continue - - if len(info[name]) > length: - pax_headers[hname] = val - - # Test number fields for values that exceed the field limit or values - # that like to be stored as float. - for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): - if name in pax_headers: - # The pax header has priority. Avoid overflow. - info[name] = 0 - continue - - val = info[name] - if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): - pax_headers[name] = unicode(val) - info[name] = 0 - - # Create a pax extended header if necessary. - if pax_headers: - buf = self._create_pax_generic_header(pax_headers) - else: - buf = "" - - return buf + self._create_header(info, USTAR_FORMAT) - - @classmethod - def create_pax_global_header(cls, pax_headers): - """Return the object as a pax global header block sequence. - """ - return cls._create_pax_generic_header(pax_headers, type=XGLTYPE) - - def _posix_split_name(self, name): - """Split a name longer than 100 chars into a prefix - and a name part. - """ - prefix = name[:LENGTH_PREFIX + 1] - while prefix and prefix[-1] != "/": - prefix = prefix[:-1] - - name = name[len(prefix):] - prefix = prefix[:-1] - - if not prefix or len(name) > LENGTH_NAME: - raise ValueError("name is too long") - return prefix, name - - @staticmethod - def _create_header(info, format): - """Return a header block. info is a dictionary with file - information, format must be one of the *_FORMAT constants. - """ - parts = [ - stn(info.get("name", ""), 100), - itn(info.get("mode", 0) & 07777, 8, format), - itn(info.get("uid", 0), 8, format), - itn(info.get("gid", 0), 8, format), - itn(info.get("size", 0), 12, format), - itn(info.get("mtime", 0), 12, format), - " ", # checksum field - info.get("type", REGTYPE), - stn(info.get("linkname", ""), 100), - stn(info.get("magic", POSIX_MAGIC), 8), - stn(info.get("uname", ""), 32), - stn(info.get("gname", ""), 32), - itn(info.get("devmajor", 0), 8, format), - itn(info.get("devminor", 0), 8, format), - stn(info.get("prefix", ""), 155) - ] - - buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts)) - chksum = calc_chksums(buf[-BLOCKSIZE:])[0] - buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] - return buf - - @staticmethod - def _create_payload(payload): - """Return the string payload filled with zero bytes - up to the next 512 byte border. - """ - blocks, remainder = divmod(len(payload), BLOCKSIZE) - if remainder > 0: - payload += (BLOCKSIZE - remainder) * NUL - return payload - - @classmethod - def _create_gnu_long_header(cls, name, type): - """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence - for name. - """ - name += NUL - - info = {} - info["name"] = "././@LongLink" - info["type"] = type - info["size"] = len(name) - info["magic"] = GNU_MAGIC - - # create extended header + name blocks. - return cls._create_header(info, USTAR_FORMAT) + \ - cls._create_payload(name) - - @classmethod - def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE): - """Return a POSIX.1-2001 extended or global header sequence - that contains a list of keyword, value pairs. The values - must be unicode objects. - """ - records = [] - for keyword, value in pax_headers.iteritems(): - keyword = keyword.encode("utf8") - value = value.encode("utf8") - l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' - n = p = 0 - while True: - n = l + len(str(p)) - if n == p: - break - p = n - records.append("%d %s=%s\n" % (p, keyword, value)) - records = "".join(records) - - # We use a hardcoded "././@PaxHeader" name like star does - # instead of the one that POSIX recommends. - info = {} - info["name"] = "././@PaxHeader" - info["type"] = type - info["size"] = len(records) - info["magic"] = POSIX_MAGIC - - # Create pax header + record blocks. - return cls._create_header(info, USTAR_FORMAT) + \ - cls._create_payload(records) - - @classmethod - def frombuf(cls, buf): - """Construct a TarInfo object from a 512 byte string buffer. - """ - if len(buf) == 0: - raise EmptyHeaderError("empty header") - if len(buf) != BLOCKSIZE: - raise TruncatedHeaderError("truncated header") - if buf.count(NUL) == BLOCKSIZE: - raise EOFHeaderError("end of file header") - - chksum = nti(buf[148:156]) - if chksum not in calc_chksums(buf): - raise InvalidHeaderError("bad checksum") - - obj = cls() - obj.buf = buf - obj.name = nts(buf[0:100]) - obj.mode = nti(buf[100:108]) - obj.uid = nti(buf[108:116]) - obj.gid = nti(buf[116:124]) - obj.size = nti(buf[124:136]) - obj.mtime = nti(buf[136:148]) - obj.chksum = chksum - obj.type = buf[156:157] - obj.linkname = nts(buf[157:257]) - obj.uname = nts(buf[265:297]) - obj.gname = nts(buf[297:329]) - obj.devmajor = nti(buf[329:337]) - obj.devminor = nti(buf[337:345]) - prefix = nts(buf[345:500]) - - # Old V7 tar format represents a directory as a regular - # file with a trailing slash. - if obj.type == AREGTYPE and obj.name.endswith("/"): - obj.type = DIRTYPE - - # Remove redundant slashes from directories. - if obj.isdir(): - obj.name = obj.name.rstrip("/") - - # Reconstruct a ustar longname. - if prefix and obj.type not in GNU_TYPES: - obj.name = prefix + "/" + obj.name - return obj - - @classmethod - def fromtarfile(cls, tarfile): - """Return the next TarInfo object from TarFile object - tarfile. - """ - buf = tarfile.fileobj.read(BLOCKSIZE) - obj = cls.frombuf(buf) - obj.offset = tarfile.fileobj.tell() - BLOCKSIZE - return obj._proc_member(tarfile) - - #-------------------------------------------------------------------------- - # The following are methods that are called depending on the type of a - # member. The entry point is _proc_member() which can be overridden in a - # subclass to add custom _proc_*() methods. A _proc_*() method MUST - # implement the following - # operations: - # 1. Set self.offset_data to the position where the data blocks begin, - # if there is data that follows. - # 2. Set tarfile.offset to the position where the next member's header will - # begin. - # 3. Return self or another valid TarInfo object. - def _proc_member(self, tarfile): - """Choose the right processing method depending on - the type and call it. - """ - if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): - return self._proc_gnulong(tarfile) - elif self.type == GNUTYPE_SPARSE: - return self._proc_sparse(tarfile) - elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): - return self._proc_pax(tarfile) - else: - return self._proc_builtin(tarfile) - - def _proc_builtin(self, tarfile): - """Process a builtin type or an unknown type which - will be treated as a regular file. - """ - self.offset_data = tarfile.fileobj.tell() - offset = self.offset_data - if self.isreg() or self.type not in SUPPORTED_TYPES: - # Skip the following data blocks. - offset += self._block(self.size) - tarfile.offset = offset - - # Patch the TarInfo object with saved global - # header information. - self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) - - return self - - def _proc_gnulong(self, tarfile): - """Process the blocks that hold a GNU longname - or longlink member. - """ - buf = tarfile.fileobj.read(self._block(self.size)) - - # Fetch the next header and process it. - try: - next = self.fromtarfile(tarfile) - except HeaderError: - raise SubsequentHeaderError("missing or bad subsequent header") - - # Patch the TarInfo object from the next header with - # the longname information. - next.offset = self.offset - if self.type == GNUTYPE_LONGNAME: - next.name = nts(buf) - elif self.type == GNUTYPE_LONGLINK: - next.linkname = nts(buf) - - return next - - def _proc_sparse(self, tarfile): - """Process a GNU sparse header plus extra headers. - """ - buf = self.buf - sp = _ringbuffer() - pos = 386 - lastpos = 0L - realpos = 0L - # There are 4 possible sparse structs in the - # first header. - for i in xrange(4): - try: - offset = nti(buf[pos:pos + 12]) - numbytes = nti(buf[pos + 12:pos + 24]) - except ValueError: - break - if offset > lastpos: - sp.append(_hole(lastpos, offset - lastpos)) - sp.append(_data(offset, numbytes, realpos)) - realpos += numbytes - lastpos = offset + numbytes - pos += 24 - - isextended = ord(buf[482]) - origsize = nti(buf[483:495]) - - # If the isextended flag is given, - # there are extra headers to process. - while isextended == 1: - buf = tarfile.fileobj.read(BLOCKSIZE) - pos = 0 - for i in xrange(21): - try: - offset = nti(buf[pos:pos + 12]) - numbytes = nti(buf[pos + 12:pos + 24]) - except ValueError: - break - if offset > lastpos: - sp.append(_hole(lastpos, offset - lastpos)) - sp.append(_data(offset, numbytes, realpos)) - realpos += numbytes - lastpos = offset + numbytes - pos += 24 - isextended = ord(buf[504]) - - if lastpos < origsize: - sp.append(_hole(lastpos, origsize - lastpos)) - - self.sparse = sp - - self.offset_data = tarfile.fileobj.tell() - tarfile.offset = self.offset_data + self._block(self.size) - self.size = origsize - - return self - - def _proc_pax(self, tarfile): - """Process an extended or global header as described in - POSIX.1-2001. - """ - # Read the header information. - buf = tarfile.fileobj.read(self._block(self.size)) - - # A pax header stores supplemental information for either - # the following file (extended) or all following files - # (global). - if self.type == XGLTYPE: - pax_headers = tarfile.pax_headers - else: - pax_headers = tarfile.pax_headers.copy() - - # Parse pax header information. A record looks like that: - # "%d %s=%s\n" % (length, keyword, value). length is the size - # of the complete record including the length field itself and - # the newline. keyword and value are both UTF-8 encoded strings. - regex = re.compile(r"(\d+) ([^=]+)=", re.U) - pos = 0 - while True: - match = regex.match(buf, pos) - if not match: - break - - length, keyword = match.groups() - length = int(length) - value = buf[match.end(2) + 1:match.start(1) + length - 1] - - keyword = keyword.decode("utf8") - value = value.decode("utf8") - - pax_headers[keyword] = value - pos += length - - # Fetch the next header. - try: - next = self.fromtarfile(tarfile) - except HeaderError: - raise SubsequentHeaderError("missing or bad subsequent header") - - if self.type in (XHDTYPE, SOLARIS_XHDTYPE): - # Patch the TarInfo object with the extended header info. - next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) - next.offset = self.offset - - if "size" in pax_headers: - # If the extended header replaces the size field, - # we need to recalculate the offset where the next - # header starts. - offset = next.offset_data - if next.isreg() or next.type not in SUPPORTED_TYPES: - offset += next._block(next.size) - tarfile.offset = offset - - return next - - def _apply_pax_info(self, pax_headers, encoding, errors): - """Replace fields with supplemental information from a previous - pax extended or global header. - """ - for keyword, value in pax_headers.iteritems(): - if keyword not in PAX_FIELDS: - continue - - if keyword == "path": - value = value.rstrip("/") - - if keyword in PAX_NUMBER_FIELDS: - try: - value = PAX_NUMBER_FIELDS[keyword](value) - except ValueError: - value = 0 - else: - value = uts(value, encoding, errors) - - setattr(self, keyword, value) - - self.pax_headers = pax_headers.copy() - - def _block(self, count): - """Round up a byte count by BLOCKSIZE and return it, - e.g. _block(834) => 1024. - """ - blocks, remainder = divmod(count, BLOCKSIZE) - if remainder: - blocks += 1 - return blocks * BLOCKSIZE - - def isreg(self): - return self.type in REGULAR_TYPES - def isfile(self): - return self.isreg() - def isdir(self): - return self.type == DIRTYPE - def issym(self): - return self.type == SYMTYPE - def islnk(self): - return self.type == LNKTYPE - def ischr(self): - return self.type == CHRTYPE - def isblk(self): - return self.type == BLKTYPE - def isfifo(self): - return self.type == FIFOTYPE - def issparse(self): - return self.type == GNUTYPE_SPARSE - def isdev(self): - return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) -# class TarInfo - -class TarFile(object): - """The TarFile Class provides an interface to tar archives. - """ - - debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) - - dereference = False # If true, add content of linked file to the - # tar file, else the link. - - ignore_zeros = False # If true, skips empty or invalid blocks and - # continues processing. - - errorlevel = 1 # If 0, fatal errors only appear in debug - # messages (if debug >= 0). If > 0, errors - # are passed to the caller as exceptions. - - format = DEFAULT_FORMAT # The format to use when creating an archive. - - encoding = ENCODING # Encoding for 8-bit character strings. - - errors = None # Error handler for unicode conversion. - - tarinfo = TarInfo # The default TarInfo class to use. - - fileobject = ExFileObject # The default ExFileObject class to use. - - def __init__(self, name=None, mode="r", fileobj=None, format=None, - tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, - errors=None, pax_headers=None, debug=None, errorlevel=None): - """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to - read from an existing archive, 'a' to append data to an existing - file or 'w' to create a new file overwriting an existing one. `mode' - defaults to 'r'. - If `fileobj' is given, it is used for reading or writing data. If it - can be determined, `mode' is overridden by `fileobj's mode. - `fileobj' is not closed, when TarFile is closed. - """ - modes = {"r": "rb", "a": "r+b", "w": "wb"} - if mode not in modes: - raise ValueError("mode must be 'r', 'a' or 'w'") - self.mode = mode - self._mode = modes[mode] - - if not fileobj: - if self.mode == "a" and not os.path.exists(name): - # Create nonexistent files in append mode. - self.mode = "w" - self._mode = "wb" - fileobj = bltn_open(name, self._mode) - self._extfileobj = False - else: - if name is None and hasattr(fileobj, "name"): - name = fileobj.name - if hasattr(fileobj, "mode"): - self._mode = fileobj.mode - self._extfileobj = True - self.name = os.path.abspath(name) if name else None - self.fileobj = fileobj - - # Init attributes. - if format is not None: - self.format = format - if tarinfo is not None: - self.tarinfo = tarinfo - if dereference is not None: - self.dereference = dereference - if ignore_zeros is not None: - self.ignore_zeros = ignore_zeros - if encoding is not None: - self.encoding = encoding - - if errors is not None: - self.errors = errors - elif mode == "r": - self.errors = "utf-8" - else: - self.errors = "strict" - - if pax_headers is not None and self.format == PAX_FORMAT: - self.pax_headers = pax_headers - else: - self.pax_headers = {} - - if debug is not None: - self.debug = debug - if errorlevel is not None: - self.errorlevel = errorlevel - - # Init datastructures. - self.closed = False - self.members = [] # list of members as TarInfo objects - self._loaded = False # flag if all members have been read - self.offset = self.fileobj.tell() - # current position in the archive file - self.inodes = {} # dictionary caching the inodes of - # archive members already added - - try: - if self.mode == "r": - self.firstmember = None - self.firstmember = self.next() - - if self.mode == "a": - # Move to the end of the archive, - # before the first empty block. - while True: - self.fileobj.seek(self.offset) - try: - tarinfo = self.tarinfo.fromtarfile(self) - self.members.append(tarinfo) - except EOFHeaderError: - self.fileobj.seek(self.offset) - break - except HeaderError, e: - raise ReadError(str(e)) - - if self.mode in "aw": - self._loaded = True - - if self.pax_headers: - buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) - self.fileobj.write(buf) - self.offset += len(buf) - except: - if not self._extfileobj: - self.fileobj.close() - self.closed = True - raise - - def _getposix(self): - return self.format == USTAR_FORMAT - def _setposix(self, value): - import warnings - warnings.warn("use the format attribute instead", DeprecationWarning, - 2) - if value: - self.format = USTAR_FORMAT - else: - self.format = GNU_FORMAT - posix = property(_getposix, _setposix) - - #-------------------------------------------------------------------------- - # Below are the classmethods which act as alternate constructors to the - # TarFile class. The open() method is the only one that is needed for - # public use; it is the "super"-constructor and is able to select an - # adequate "sub"-constructor for a particular compression using the mapping - # from OPEN_METH. - # - # This concept allows one to subclass TarFile without losing the comfort of - # the super-constructor. A sub-constructor is registered and made available - # by adding it to the mapping in OPEN_METH. - - @classmethod - def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): - """Open a tar archive for reading, writing or appending. Return - an appropriate TarFile class. - - mode: - 'r' or 'r:*' open for reading with transparent compression - 'r:' open for reading exclusively uncompressed - 'r:gz' open for reading with gzip compression - 'r:bz2' open for reading with bzip2 compression - 'a' or 'a:' open for appending, creating the file if necessary - 'w' or 'w:' open for writing without compression - 'w:gz' open for writing with gzip compression - 'w:bz2' open for writing with bzip2 compression - - 'r|*' open a stream of tar blocks with transparent compression - 'r|' open an uncompressed stream of tar blocks for reading - 'r|gz' open a gzip compressed stream of tar blocks - 'r|bz2' open a bzip2 compressed stream of tar blocks - 'w|' open an uncompressed stream for writing - 'w|gz' open a gzip compressed stream for writing - 'w|bz2' open a bzip2 compressed stream for writing - """ - - if not name and not fileobj: - raise ValueError("nothing to open") - - if mode in ("r", "r:*"): - # Find out which *open() is appropriate for opening the file. - def not_compressed(comptype): - return cls.OPEN_METH[comptype] == 'taropen' - for comptype in sorted(cls.OPEN_METH, key=not_compressed): - func = getattr(cls, cls.OPEN_METH[comptype]) - if fileobj is not None: - saved_pos = fileobj.tell() - try: - return func(name, "r", fileobj, **kwargs) - except (ReadError, CompressionError), e: - if fileobj is not None: - fileobj.seek(saved_pos) - continue - raise ReadError("file could not be opened successfully") - - elif ":" in mode: - filemode, comptype = mode.split(":", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - # Select the *open() function according to - # given compression. - if comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - else: - raise CompressionError("unknown compression type %r" % comptype) - return func(name, filemode, fileobj, **kwargs) - - elif "|" in mode: - filemode, comptype = mode.split("|", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - if filemode not in ("r", "w"): - raise ValueError("mode must be 'r' or 'w'") - - stream = _Stream(name, filemode, comptype, fileobj, bufsize) - try: - t = cls(name, filemode, stream, **kwargs) - except: - stream.close() - raise - t._extfileobj = False - return t - - elif mode in ("a", "w"): - return cls.taropen(name, mode, fileobj, **kwargs) - - raise ValueError("undiscernible mode") - - @classmethod - def taropen(cls, name, mode="r", fileobj=None, **kwargs): - """Open uncompressed tar archive name for reading or writing. - """ - if mode not in ("r", "a", "w"): - raise ValueError("mode must be 'r', 'a' or 'w'") - return cls(name, mode, fileobj, **kwargs) - - @classmethod - def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): - """Open gzip compressed tar archive name for reading or writing. - Appending is not allowed. - """ - if mode not in ("r", "w"): - raise ValueError("mode must be 'r' or 'w'") - - try: - import gzip - gzip.GzipFile - except (ImportError, AttributeError): - raise CompressionError("gzip module is not available") - - try: - fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj) - except OSError: - if fileobj is not None and mode == 'r': - raise ReadError("not a gzip file") - raise - - try: - t = cls.taropen(name, mode, fileobj, **kwargs) - except IOError: - fileobj.close() - if mode == 'r': - raise ReadError("not a gzip file") - raise - except: - fileobj.close() - raise - t._extfileobj = False - return t - - @classmethod - def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): - """Open bzip2 compressed tar archive name for reading or writing. - Appending is not allowed. - """ - if mode not in ("r", "w"): - raise ValueError("mode must be 'r' or 'w'.") - - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - - if fileobj is not None: - fileobj = _BZ2Proxy(fileobj, mode) - else: - fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) - - try: - t = cls.taropen(name, mode, fileobj, **kwargs) - except (IOError, EOFError): - fileobj.close() - if mode == 'r': - raise ReadError("not a bzip2 file") - raise - except: - fileobj.close() - raise - t._extfileobj = False - return t - - # All *open() methods are registered here. - OPEN_METH = { - "tar": "taropen", # uncompressed tar - "gz": "gzopen", # gzip compressed tar - "bz2": "bz2open" # bzip2 compressed tar - } - - #-------------------------------------------------------------------------- - # The public methods which TarFile provides: - - def close(self): - """Close the TarFile. In write-mode, two finishing zero blocks are - appended to the archive. - """ - if self.closed: - return - - self.closed = True - try: - if self.mode in "aw": - self.fileobj.write(NUL * (BLOCKSIZE * 2)) - self.offset += (BLOCKSIZE * 2) - # fill up the end with zero-blocks - # (like option -b20 for tar does) - blocks, remainder = divmod(self.offset, RECORDSIZE) - if remainder > 0: - self.fileobj.write(NUL * (RECORDSIZE - remainder)) - finally: - if not self._extfileobj: - self.fileobj.close() - - def getmember(self, name): - """Return a TarInfo object for member `name'. If `name' can not be - found in the archive, KeyError is raised. If a member occurs more - than once in the archive, its last occurrence is assumed to be the - most up-to-date version. - """ - tarinfo = self._getmember(name) - if tarinfo is None: - raise KeyError("filename %r not found" % name) - return tarinfo - - def getmembers(self): - """Return the members of the archive as a list of TarInfo objects. The - list has the same order as the members in the archive. - """ - self._check() - if not self._loaded: # if we want to obtain a list of - self._load() # all members, we first have to - # scan the whole archive. - return self.members - - def getnames(self): - """Return the members of the archive as a list of their names. It has - the same order as the list returned by getmembers(). - """ - return [tarinfo.name for tarinfo in self.getmembers()] - - def gettarinfo(self, name=None, arcname=None, fileobj=None): - """Create a TarInfo object from the result of os.stat or equivalent - on an existing file. The file is either named by `name', or - specified as a file object `fileobj' with a file descriptor. If - given, `arcname' specifies an alternative name for the file in the - archive, otherwise, the name is taken from the 'name' attribute of - 'fileobj', or the 'name' argument. - """ - self._check("aw") - - # When fileobj is given, replace name by - # fileobj's real name. - if fileobj is not None: - name = fileobj.name - - # Building the name of the member in the archive. - # Backward slashes are converted to forward slashes, - # Absolute paths are turned to relative paths. - if arcname is None: - arcname = name - drv, arcname = os.path.splitdrive(arcname) - arcname = arcname.replace(os.sep, "/") - arcname = arcname.lstrip("/") - - # Now, fill the TarInfo object with - # information specific for the file. - tarinfo = self.tarinfo() - tarinfo.tarfile = self # Not needed - - # Use os.stat or os.lstat, depending on platform - # and if symlinks shall be resolved. - if fileobj is None: - if hasattr(os, "lstat") and not self.dereference: - statres = os.lstat(name) - else: - statres = os.stat(name) - else: - statres = os.fstat(fileobj.fileno()) - linkname = "" - - stmd = statres.st_mode - if stat.S_ISREG(stmd): - inode = (statres.st_ino, statres.st_dev) - if not self.dereference and statres.st_nlink > 1 and \ - inode in self.inodes and arcname != self.inodes[inode]: - # Is it a hardlink to an already - # archived file? - type = LNKTYPE - linkname = self.inodes[inode] - else: - # The inode is added only if its valid. - # For win32 it is always 0. - type = REGTYPE - if inode[0]: - self.inodes[inode] = arcname - elif stat.S_ISDIR(stmd): - type = DIRTYPE - elif stat.S_ISFIFO(stmd): - type = FIFOTYPE - elif stat.S_ISLNK(stmd): - type = SYMTYPE - linkname = os.readlink(name) - elif stat.S_ISCHR(stmd): - type = CHRTYPE - elif stat.S_ISBLK(stmd): - type = BLKTYPE - else: - return None - - # Fill the TarInfo object with all - # information we can get. - tarinfo.name = arcname - tarinfo.mode = stmd - tarinfo.uid = statres.st_uid - tarinfo.gid = statres.st_gid - if type == REGTYPE: - tarinfo.size = statres.st_size - else: - tarinfo.size = 0L - tarinfo.mtime = statres.st_mtime - tarinfo.type = type - tarinfo.linkname = linkname - if pwd: - try: - tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] - except KeyError: - pass - if grp: - try: - tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] - except KeyError: - pass - - if type in (CHRTYPE, BLKTYPE): - if hasattr(os, "major") and hasattr(os, "minor"): - tarinfo.devmajor = os.major(statres.st_rdev) - tarinfo.devminor = os.minor(statres.st_rdev) - return tarinfo - - def list(self, verbose=True): - """Print a table of contents to sys.stdout. If `verbose' is False, only - the names of the members are printed. If it is True, an `ls -l'-like - output is produced. - """ - self._check() - - for tarinfo in self: - if verbose: - print filemode(tarinfo.mode), - print "%s/%s" % (tarinfo.uname or tarinfo.uid, - tarinfo.gname or tarinfo.gid), - if tarinfo.ischr() or tarinfo.isblk(): - print "%10s" % ("%d,%d" \ - % (tarinfo.devmajor, tarinfo.devminor)), - else: - print "%10d" % tarinfo.size, - print "%d-%02d-%02d %02d:%02d:%02d" \ - % time.localtime(tarinfo.mtime)[:6], - - print tarinfo.name + ("/" if tarinfo.isdir() else ""), - - if verbose: - if tarinfo.issym(): - print "->", tarinfo.linkname, - if tarinfo.islnk(): - print "link to", tarinfo.linkname, - print - - def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): - """Add the file `name' to the archive. `name' may be any type of file - (directory, fifo, symbolic link, etc.). If given, `arcname' - specifies an alternative name for the file in the archive. - Directories are added recursively by default. This can be avoided by - setting `recursive' to False. `exclude' is a function that should - return True for each filename to be excluded. `filter' is a function - that expects a TarInfo object argument and returns the changed - TarInfo object, if it returns None the TarInfo object will be - excluded from the archive. - """ - self._check("aw") - - if arcname is None: - arcname = name - - # Exclude pathnames. - if exclude is not None: - import warnings - warnings.warn("use the filter argument instead", - DeprecationWarning, 2) - if exclude(name): - self._dbg(2, "tarfile: Excluded %r" % name) - return - - # Skip if somebody tries to archive the archive... - if self.name is not None and os.path.abspath(name) == self.name: - self._dbg(2, "tarfile: Skipped %r" % name) - return - - self._dbg(1, name) - - # Create a TarInfo object from the file. - tarinfo = self.gettarinfo(name, arcname) - - if tarinfo is None: - self._dbg(1, "tarfile: Unsupported type %r" % name) - return - - # Change or exclude the TarInfo object. - if filter is not None: - tarinfo = filter(tarinfo) - if tarinfo is None: - self._dbg(2, "tarfile: Excluded %r" % name) - return - - # Append the tar header and data to the archive. - if tarinfo.isreg(): - with bltn_open(name, "rb") as f: - self.addfile(tarinfo, f) - - elif tarinfo.isdir(): - self.addfile(tarinfo) - if recursive: - for f in os.listdir(name): - self.add(os.path.join(name, f), os.path.join(arcname, f), - recursive, exclude, filter) - - else: - self.addfile(tarinfo) - - def addfile(self, tarinfo, fileobj=None): - """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is - given, tarinfo.size bytes are read from it and added to the archive. - You can create TarInfo objects directly, or by using gettarinfo(). - On Windows platforms, `fileobj' should always be opened with mode - 'rb' to avoid irritation about the file size. - """ - self._check("aw") - - tarinfo = copy.copy(tarinfo) - - buf = tarinfo.tobuf(self.format, self.encoding, self.errors) - self.fileobj.write(buf) - self.offset += len(buf) - - # If there's data to follow, append it. - if fileobj is not None: - copyfileobj(fileobj, self.fileobj, tarinfo.size) - blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) - if remainder > 0: - self.fileobj.write(NUL * (BLOCKSIZE - remainder)) - blocks += 1 - self.offset += blocks * BLOCKSIZE - - self.members.append(tarinfo) - - def extractall(self, path=".", members=None): - """Extract all members from the archive to the current working - directory and set owner, modification time and permissions on - directories afterwards. `path' specifies a different directory - to extract to. `members' is optional and must be a subset of the - list returned by getmembers(). - """ - directories = [] - - if members is None: - members = self - - for tarinfo in members: - if tarinfo.isdir(): - # Extract directories with a safe mode. - directories.append(tarinfo) - tarinfo = copy.copy(tarinfo) - tarinfo.mode = 0700 - self.extract(tarinfo, path) - - # Reverse sort directories. - directories.sort(key=operator.attrgetter('name')) - directories.reverse() - - # Set correct owner, mtime and filemode on directories. - for tarinfo in directories: - dirpath = os.path.join(path, tarinfo.name) - try: - self.chown(tarinfo, dirpath) - self.utime(tarinfo, dirpath) - self.chmod(tarinfo, dirpath) - except ExtractError, e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def extract(self, member, path=""): - """Extract a member from the archive to the current working directory, - using its full name. Its file information is extracted as accurately - as possible. `member' may be a filename or a TarInfo object. You can - specify a different directory using `path'. - """ - self._check("r") - - if isinstance(member, basestring): - tarinfo = self.getmember(member) - else: - tarinfo = member - - # Prepare the link target for makelink(). - if tarinfo.islnk(): - tarinfo._link_target = os.path.join(path, tarinfo.linkname) - - try: - self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) - except EnvironmentError, e: - if self.errorlevel > 0: - raise - else: - if e.filename is None: - self._dbg(1, "tarfile: %s" % e.strerror) - else: - self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) - except ExtractError, e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def extractfile(self, member): - """Extract a member from the archive as a file object. `member' may be - a filename or a TarInfo object. If `member' is a regular file, a - file-like object is returned. If `member' is a link, a file-like - object is constructed from the link's target. If `member' is none of - the above, None is returned. - The file-like object is read-only and provides the following - methods: read(), readline(), readlines(), seek() and tell() - """ - self._check("r") - - if isinstance(member, basestring): - tarinfo = self.getmember(member) - else: - tarinfo = member - - if tarinfo.isreg(): - return self.fileobject(self, tarinfo) - - elif tarinfo.type not in SUPPORTED_TYPES: - # If a member's type is unknown, it is treated as a - # regular file. - return self.fileobject(self, tarinfo) - - elif tarinfo.islnk() or tarinfo.issym(): - if isinstance(self.fileobj, _Stream): - # A small but ugly workaround for the case that someone tries - # to extract a (sym)link as a file-object from a non-seekable - # stream of tar blocks. - raise StreamError("cannot extract (sym)link as file object") - else: - # A (sym)link's file object is its target's file object. - return self.extractfile(self._find_link_target(tarinfo)) - else: - # If there's no data associated with the member (directory, chrdev, - # blkdev, etc.), return None instead of a file object. - return None - - def _extract_member(self, tarinfo, targetpath): - """Extract the TarInfo object tarinfo to a physical - file called targetpath. - """ - # Fetch the TarInfo object for the given name - # and build the destination pathname, replacing - # forward slashes to platform specific separators. - targetpath = targetpath.rstrip("/") - targetpath = targetpath.replace("/", os.sep) - - # Create all upper directories. - upperdirs = os.path.dirname(targetpath) - if upperdirs and not os.path.exists(upperdirs): - # Create directories that are not part of the archive with - # default permissions. - os.makedirs(upperdirs) - - if tarinfo.islnk() or tarinfo.issym(): - self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) - else: - self._dbg(1, tarinfo.name) - - if tarinfo.isreg(): - self.makefile(tarinfo, targetpath) - elif tarinfo.isdir(): - self.makedir(tarinfo, targetpath) - elif tarinfo.isfifo(): - self.makefifo(tarinfo, targetpath) - elif tarinfo.ischr() or tarinfo.isblk(): - self.makedev(tarinfo, targetpath) - elif tarinfo.islnk() or tarinfo.issym(): - self.makelink(tarinfo, targetpath) - elif tarinfo.type not in SUPPORTED_TYPES: - self.makeunknown(tarinfo, targetpath) - else: - self.makefile(tarinfo, targetpath) - - self.chown(tarinfo, targetpath) - if not tarinfo.issym(): - self.chmod(tarinfo, targetpath) - self.utime(tarinfo, targetpath) - - #-------------------------------------------------------------------------- - # Below are the different file methods. They are called via - # _extract_member() when extract() is called. They can be replaced in a - # subclass to implement other functionality. - - def makedir(self, tarinfo, targetpath): - """Make a directory called targetpath. - """ - try: - # Use a safe mode for the directory, the real mode is set - # later in _extract_member(). - os.mkdir(targetpath, 0700) - except EnvironmentError, e: - if e.errno != errno.EEXIST: - raise - - def makefile(self, tarinfo, targetpath): - """Make a file called targetpath. - """ - source = self.extractfile(tarinfo) - try: - with bltn_open(targetpath, "wb") as target: - copyfileobj(source, target) - finally: - source.close() - - def makeunknown(self, tarinfo, targetpath): - """Make a file from a TarInfo object with an unknown type - at targetpath. - """ - self.makefile(tarinfo, targetpath) - self._dbg(1, "tarfile: Unknown file type %r, " \ - "extracted as regular file." % tarinfo.type) - - def makefifo(self, tarinfo, targetpath): - """Make a fifo called targetpath. - """ - if hasattr(os, "mkfifo"): - os.mkfifo(targetpath) - else: - raise ExtractError("fifo not supported by system") - - def makedev(self, tarinfo, targetpath): - """Make a character or block device called targetpath. - """ - if not hasattr(os, "mknod") or not hasattr(os, "makedev"): - raise ExtractError("special devices not supported by system") - - mode = tarinfo.mode - if tarinfo.isblk(): - mode |= stat.S_IFBLK - else: - mode |= stat.S_IFCHR - - os.mknod(targetpath, mode, - os.makedev(tarinfo.devmajor, tarinfo.devminor)) - - def makelink(self, tarinfo, targetpath): - """Make a (symbolic) link called targetpath. If it cannot be created - (platform limitation), we try to make a copy of the referenced file - instead of a link. - """ - if hasattr(os, "symlink") and hasattr(os, "link"): - # For systems that support symbolic and hard links. - if tarinfo.issym(): - if os.path.lexists(targetpath): - os.unlink(targetpath) - os.symlink(tarinfo.linkname, targetpath) - else: - # See extract(). - if os.path.exists(tarinfo._link_target): - if os.path.lexists(targetpath): - os.unlink(targetpath) - os.link(tarinfo._link_target, targetpath) - else: - self._extract_member(self._find_link_target(tarinfo), targetpath) - else: - try: - self._extract_member(self._find_link_target(tarinfo), targetpath) - except KeyError: - raise ExtractError("unable to resolve link inside archive") - - def chown(self, tarinfo, targetpath): - """Set owner of targetpath according to tarinfo. - """ - if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: - # We have to be root to do so. - try: - g = grp.getgrnam(tarinfo.gname)[2] - except KeyError: - g = tarinfo.gid - try: - u = pwd.getpwnam(tarinfo.uname)[2] - except KeyError: - u = tarinfo.uid - try: - if tarinfo.issym() and hasattr(os, "lchown"): - os.lchown(targetpath, u, g) - else: - if sys.platform != "os2emx": - os.chown(targetpath, u, g) - except EnvironmentError, e: - raise ExtractError("could not change owner") - - def chmod(self, tarinfo, targetpath): - """Set file permissions of targetpath according to tarinfo. - """ - if hasattr(os, 'chmod'): - try: - os.chmod(targetpath, tarinfo.mode) - except EnvironmentError, e: - raise ExtractError("could not change mode") - - def utime(self, tarinfo, targetpath): - """Set modification time of targetpath according to tarinfo. - """ - if not hasattr(os, 'utime'): - return - try: - os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) - except EnvironmentError, e: - raise ExtractError("could not change modification time") - - #-------------------------------------------------------------------------- - def next(self): - """Return the next member of the archive as a TarInfo object, when - TarFile is opened for reading. Return None if there is no more - available. - """ - self._check("ra") - if self.firstmember is not None: - m = self.firstmember - self.firstmember = None - return m - - # Advance the file pointer. - if self.offset != self.fileobj.tell(): - self.fileobj.seek(self.offset - 1) - if not self.fileobj.read(1): - raise ReadError("unexpected end of data") - - # Read the next block. - tarinfo = None - while True: - try: - tarinfo = self.tarinfo.fromtarfile(self) - except EOFHeaderError, e: - if self.ignore_zeros: - self._dbg(2, "0x%X: %s" % (self.offset, e)) - self.offset += BLOCKSIZE - continue - except InvalidHeaderError, e: - if self.ignore_zeros: - self._dbg(2, "0x%X: %s" % (self.offset, e)) - self.offset += BLOCKSIZE - continue - elif self.offset == 0: - raise ReadError(str(e)) - except EmptyHeaderError: - if self.offset == 0: - raise ReadError("empty file") - except TruncatedHeaderError, e: - if self.offset == 0: - raise ReadError(str(e)) - except SubsequentHeaderError, e: - raise ReadError(str(e)) - break - - if tarinfo is not None: - self.members.append(tarinfo) - else: - self._loaded = True - - return tarinfo - - #-------------------------------------------------------------------------- - # Little helper methods: - - def _getmember(self, name, tarinfo=None, normalize=False): - """Find an archive member by name from bottom to top. - If tarinfo is given, it is used as the starting point. - """ - # Ensure that all members have been loaded. - members = self.getmembers() - - # Limit the member search list up to tarinfo. - if tarinfo is not None: - members = members[:members.index(tarinfo)] - - if normalize: - name = os.path.normpath(name) - - for member in reversed(members): - if normalize: - member_name = os.path.normpath(member.name) - else: - member_name = member.name - - if name == member_name: - return member - - def _load(self): - """Read through the entire archive file and look for readable - members. - """ - while True: - tarinfo = self.next() - if tarinfo is None: - break - self._loaded = True - - def _check(self, mode=None): - """Check if TarFile is still open, and if the operation's mode - corresponds to TarFile's mode. - """ - if self.closed: - raise IOError("%s is closed" % self.__class__.__name__) - if mode is not None and self.mode not in mode: - raise IOError("bad operation for mode %r" % self.mode) - - def _find_link_target(self, tarinfo): - """Find the target member of a symlink or hardlink member in the - archive. - """ - if tarinfo.issym(): - # Always search the entire archive. - linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) - limit = None - else: - # Search the archive before the link, because a hard link is - # just a reference to an already archived file. - linkname = tarinfo.linkname - limit = tarinfo - - member = self._getmember(linkname, tarinfo=limit, normalize=True) - if member is None: - raise KeyError("linkname %r not found" % linkname) - return member - - def __iter__(self): - """Provide an iterator object. - """ - if self._loaded: - return iter(self.members) - else: - return TarIter(self) - - def _dbg(self, level, msg): - """Write debugging output to sys.stderr. - """ - if level <= self.debug: - print >> sys.stderr, msg - - def __enter__(self): - self._check() - return self - - def __exit__(self, type, value, traceback): - if type is None: - self.close() - else: - # An exception occurred. We must not call close() because - # it would try to write end-of-archive blocks and padding. - if not self._extfileobj: - self.fileobj.close() - self.closed = True -# class TarFile - -class TarIter: - """Iterator Class. - - for tarinfo in TarFile(...): - suite... - """ - - def __init__(self, tarfile): - """Construct a TarIter object. - """ - self.tarfile = tarfile - self.index = 0 - def __iter__(self): - """Return iterator object. - """ - return self - def next(self): - """Return the next item using TarFile's next() method. - When all members have been read, set TarFile as _loaded. - """ - # Fix for SF #1100429: Under rare circumstances it can - # happen that getmembers() is called during iteration, - # which will cause TarIter to stop prematurely. - - if self.index == 0 and self.tarfile.firstmember is not None: - tarinfo = self.tarfile.next() - elif self.index < len(self.tarfile.members): - tarinfo = self.tarfile.members[self.index] - elif not self.tarfile._loaded: - tarinfo = self.tarfile.next() - if not tarinfo: - self.tarfile._loaded = True - raise StopIteration - else: - raise StopIteration - self.index += 1 - return tarinfo - -# Helper classes for sparse file support -class _section: - """Base class for _data and _hole. - """ - def __init__(self, offset, size): - self.offset = offset - self.size = size - def __contains__(self, offset): - return self.offset <= offset < self.offset + self.size - -class _data(_section): - """Represent a data section in a sparse file. - """ - def __init__(self, offset, size, realpos): - _section.__init__(self, offset, size) - self.realpos = realpos - -class _hole(_section): - """Represent a hole section in a sparse file. - """ - pass - -class _ringbuffer(list): - """Ringbuffer class which increases performance - over a regular list. - """ - def __init__(self): - self.idx = 0 - def find(self, offset): - idx = self.idx - while True: - item = self[idx] - if offset in item: - break - idx += 1 - if idx == len(self): - idx = 0 - if idx == self.idx: - # End of File - return None - self.idx = idx - return item - -#--------------------------------------------- -# zipfile compatible TarFile class -#--------------------------------------------- -TAR_PLAIN = 0 # zipfile.ZIP_STORED -TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED -class TarFileCompat: - """TarFile class compatible with standard module zipfile's - ZipFile class. - """ - def __init__(self, file, mode="r", compression=TAR_PLAIN): - from warnings import warnpy3k - warnpy3k("the TarFileCompat class has been removed in Python 3.0", - stacklevel=2) - if compression == TAR_PLAIN: - self.tarfile = TarFile.taropen(file, mode) - elif compression == TAR_GZIPPED: - self.tarfile = TarFile.gzopen(file, mode) - else: - raise ValueError("unknown compression constant") - if mode[0:1] == "r": - members = self.tarfile.getmembers() - for m in members: - m.filename = m.name - m.file_size = m.size - m.date_time = time.gmtime(m.mtime)[:6] - def namelist(self): - return map(lambda m: m.name, self.infolist()) - def infolist(self): - return filter(lambda m: m.type in REGULAR_TYPES, - self.tarfile.getmembers()) - def printdir(self): - self.tarfile.list() - def testzip(self): - return - def getinfo(self, name): - return self.tarfile.getmember(name) - def read(self, name): - return self.tarfile.extractfile(self.tarfile.getmember(name)).read() - def write(self, filename, arcname=None, compress_type=None): - self.tarfile.add(filename, arcname) - def writestr(self, zinfo, bytes): - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - import calendar - tinfo = TarInfo(zinfo.filename) - tinfo.size = len(bytes) - tinfo.mtime = calendar.timegm(zinfo.date_time) - self.tarfile.addfile(tinfo, StringIO(bytes)) - def close(self): - self.tarfile.close() -#class TarFileCompat - -#-------------------- -# exported functions -#-------------------- -def is_tarfile(name): - """Return True if name points to a tar archive that we - are able to handle, else return False. - """ - try: - t = open(name) - t.close() - return True - except TarError: - return False - -open = TarFile.open diff --git a/python/Lib/telnetlib.py b/python/Lib/telnetlib.py deleted file mode 100755 index 2eaa8e3709..0000000000 --- a/python/Lib/telnetlib.py +++ /dev/null @@ -1,791 +0,0 @@ -r"""TELNET client class. - -Based on RFC 854: TELNET Protocol Specification, by J. Postel and -J. Reynolds - -Example: - ->>> from telnetlib import Telnet ->>> tn = Telnet('www.python.org', 79) # connect to finger port ->>> tn.write('guido\r\n') ->>> print tn.read_all() -Login Name TTY Idle When Where -guido Guido van Rossum pts/2 snag.cnri.reston.. - ->>> - -Note that read_all() won't read until eof -- it just reads some data --- but it guarantees to read at least one byte unless EOF is hit. - -It is possible to pass a Telnet object to select.select() in order to -wait until more data is available. Note that in this case, -read_eager() may return '' even if there was data on the socket, -because the protocol negotiation may have eaten the data. This is why -EOFError is needed in some cases to distinguish between "no data" and -"connection closed" (since the socket also appears ready for reading -when it is closed). - -To do: -- option negotiation -- timeout should be intrinsic to the connection object instead of an - option on one of the read calls only - -""" - - -# Imported modules -import errno -import sys -import socket -import select - -__all__ = ["Telnet"] - -# Tunable parameters -DEBUGLEVEL = 0 - -# Telnet protocol defaults -TELNET_PORT = 23 - -# Telnet protocol characters (don't change) -IAC = chr(255) # "Interpret As Command" -DONT = chr(254) -DO = chr(253) -WONT = chr(252) -WILL = chr(251) -theNULL = chr(0) - -SE = chr(240) # Subnegotiation End -NOP = chr(241) # No Operation -DM = chr(242) # Data Mark -BRK = chr(243) # Break -IP = chr(244) # Interrupt process -AO = chr(245) # Abort output -AYT = chr(246) # Are You There -EC = chr(247) # Erase Character -EL = chr(248) # Erase Line -GA = chr(249) # Go Ahead -SB = chr(250) # Subnegotiation Begin - - -# Telnet protocol options code (don't change) -# These ones all come from arpa/telnet.h -BINARY = chr(0) # 8-bit data path -ECHO = chr(1) # echo -RCP = chr(2) # prepare to reconnect -SGA = chr(3) # suppress go ahead -NAMS = chr(4) # approximate message size -STATUS = chr(5) # give status -TM = chr(6) # timing mark -RCTE = chr(7) # remote controlled transmission and echo -NAOL = chr(8) # negotiate about output line width -NAOP = chr(9) # negotiate about output page size -NAOCRD = chr(10) # negotiate about CR disposition -NAOHTS = chr(11) # negotiate about horizontal tabstops -NAOHTD = chr(12) # negotiate about horizontal tab disposition -NAOFFD = chr(13) # negotiate about formfeed disposition -NAOVTS = chr(14) # negotiate about vertical tab stops -NAOVTD = chr(15) # negotiate about vertical tab disposition -NAOLFD = chr(16) # negotiate about output LF disposition -XASCII = chr(17) # extended ascii character set -LOGOUT = chr(18) # force logout -BM = chr(19) # byte macro -DET = chr(20) # data entry terminal -SUPDUP = chr(21) # supdup protocol -SUPDUPOUTPUT = chr(22) # supdup output -SNDLOC = chr(23) # send location -TTYPE = chr(24) # terminal type -EOR = chr(25) # end or record -TUID = chr(26) # TACACS user identification -OUTMRK = chr(27) # output marking -TTYLOC = chr(28) # terminal location number -VT3270REGIME = chr(29) # 3270 regime -X3PAD = chr(30) # X.3 PAD -NAWS = chr(31) # window size -TSPEED = chr(32) # terminal speed -LFLOW = chr(33) # remote flow control -LINEMODE = chr(34) # Linemode option -XDISPLOC = chr(35) # X Display Location -OLD_ENVIRON = chr(36) # Old - Environment variables -AUTHENTICATION = chr(37) # Authenticate -ENCRYPT = chr(38) # Encryption option -NEW_ENVIRON = chr(39) # New - Environment variables -# the following ones come from -# http://www.iana.org/assignments/telnet-options -# Unfortunately, that document does not assign identifiers -# to all of them, so we are making them up -TN3270E = chr(40) # TN3270E -XAUTH = chr(41) # XAUTH -CHARSET = chr(42) # CHARSET -RSP = chr(43) # Telnet Remote Serial Port -COM_PORT_OPTION = chr(44) # Com Port Control Option -SUPPRESS_LOCAL_ECHO = chr(45) # Telnet Suppress Local Echo -TLS = chr(46) # Telnet Start TLS -KERMIT = chr(47) # KERMIT -SEND_URL = chr(48) # SEND-URL -FORWARD_X = chr(49) # FORWARD_X -PRAGMA_LOGON = chr(138) # TELOPT PRAGMA LOGON -SSPI_LOGON = chr(139) # TELOPT SSPI LOGON -PRAGMA_HEARTBEAT = chr(140) # TELOPT PRAGMA HEARTBEAT -EXOPL = chr(255) # Extended-Options-List -NOOPT = chr(0) - -class Telnet: - - """Telnet interface class. - - An instance of this class represents a connection to a telnet - server. The instance is initially not connected; the open() - method must be used to establish a connection. Alternatively, the - host name and optional port number can be passed to the - constructor, too. - - Don't try to reopen an already connected instance. - - This class has many read_*() methods. Note that some of them - raise EOFError when the end of the connection is read, because - they can return an empty string for other reasons. See the - individual doc strings. - - read_until(expected, [timeout]) - Read until the expected string has been seen, or a timeout is - hit (default is no timeout); may block. - - read_all() - Read all data until EOF; may block. - - read_some() - Read at least one byte or EOF; may block. - - read_very_eager() - Read all data available already queued or on the socket, - without blocking. - - read_eager() - Read either data already queued or some data available on the - socket, without blocking. - - read_lazy() - Read all data in the raw queue (processing it first), without - doing any socket I/O. - - read_very_lazy() - Reads all data in the cooked queue, without doing any socket - I/O. - - read_sb_data() - Reads available data between SB ... SE sequence. Don't block. - - set_option_negotiation_callback(callback) - Each time a telnet option is read on the input flow, this callback - (if set) is called with the following parameters : - callback(telnet socket, command, option) - option will be chr(0) when there is no option. - No other action is done afterwards by telnetlib. - - """ - - def __init__(self, host=None, port=0, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - """Constructor. - - When called without arguments, create an unconnected instance. - With a hostname argument, it connects the instance; port number - and timeout are optional. - """ - self.debuglevel = DEBUGLEVEL - self.host = host - self.port = port - self.timeout = timeout - self.sock = None - self.rawq = '' - self.irawq = 0 - self.cookedq = '' - self.eof = 0 - self.iacseq = '' # Buffer for IAC sequence. - self.sb = 0 # flag for SB and SE sequence. - self.sbdataq = '' - self.option_callback = None - self._has_poll = hasattr(select, 'poll') - if host is not None: - self.open(host, port, timeout) - - def open(self, host, port=0, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - """Connect to a host. - - The optional second argument is the port number, which - defaults to the standard telnet port (23). - - Don't try to reopen an already connected instance. - """ - self.eof = 0 - if not port: - port = TELNET_PORT - self.host = host - self.port = port - self.timeout = timeout - self.sock = socket.create_connection((host, port), timeout) - - def __del__(self): - """Destructor -- close the connection.""" - self.close() - - def msg(self, msg, *args): - """Print a debug message, when the debug level is > 0. - - If extra arguments are present, they are substituted in the - message using the standard string formatting operator. - - """ - if self.debuglevel > 0: - print 'Telnet(%s,%s):' % (self.host, self.port), - if args: - print msg % args - else: - print msg - - def set_debuglevel(self, debuglevel): - """Set the debug level. - - The higher it is, the more debug output you get (on sys.stdout). - - """ - self.debuglevel = debuglevel - - def close(self): - """Close the connection.""" - sock = self.sock - self.sock = 0 - self.eof = 1 - self.iacseq = '' - self.sb = 0 - if sock: - sock.close() - - def get_socket(self): - """Return the socket object used internally.""" - return self.sock - - def fileno(self): - """Return the fileno() of the socket object used internally.""" - return self.sock.fileno() - - def write(self, buffer): - """Write a string to the socket, doubling any IAC characters. - - Can block if the connection is blocked. May raise - socket.error if the connection is closed. - - """ - if IAC in buffer: - buffer = buffer.replace(IAC, IAC+IAC) - self.msg("send %r", buffer) - self.sock.sendall(buffer) - - def read_until(self, match, timeout=None): - """Read until a given string is encountered or until timeout. - - When no match is found, return whatever is available instead, - possibly the empty string. Raise EOFError if the connection - is closed and no cooked data is available. - - """ - if self._has_poll: - return self._read_until_with_poll(match, timeout) - else: - return self._read_until_with_select(match, timeout) - - def _read_until_with_poll(self, match, timeout): - """Read until a given string is encountered or until timeout. - - This method uses select.poll() to implement the timeout. - """ - n = len(match) - call_timeout = timeout - if timeout is not None: - from time import time - time_start = time() - self.process_rawq() - i = self.cookedq.find(match) - if i < 0: - poller = select.poll() - poll_in_or_priority_flags = select.POLLIN | select.POLLPRI - poller.register(self, poll_in_or_priority_flags) - while i < 0 and not self.eof: - try: - # Poll takes its timeout in milliseconds. - ready = poller.poll(None if timeout is None - else 1000 * call_timeout) - except select.error as e: - if e.errno == errno.EINTR: - if timeout is not None: - elapsed = time() - time_start - call_timeout = timeout-elapsed - continue - raise - for fd, mode in ready: - if mode & poll_in_or_priority_flags: - i = max(0, len(self.cookedq)-n) - self.fill_rawq() - self.process_rawq() - i = self.cookedq.find(match, i) - if timeout is not None: - elapsed = time() - time_start - if elapsed >= timeout: - break - call_timeout = timeout-elapsed - poller.unregister(self) - if i >= 0: - i = i + n - buf = self.cookedq[:i] - self.cookedq = self.cookedq[i:] - return buf - return self.read_very_lazy() - - def _read_until_with_select(self, match, timeout=None): - """Read until a given string is encountered or until timeout. - - The timeout is implemented using select.select(). - """ - n = len(match) - self.process_rawq() - i = self.cookedq.find(match) - if i >= 0: - i = i+n - buf = self.cookedq[:i] - self.cookedq = self.cookedq[i:] - return buf - s_reply = ([self], [], []) - s_args = s_reply - if timeout is not None: - s_args = s_args + (timeout,) - from time import time - time_start = time() - while not self.eof and select.select(*s_args) == s_reply: - i = max(0, len(self.cookedq)-n) - self.fill_rawq() - self.process_rawq() - i = self.cookedq.find(match, i) - if i >= 0: - i = i+n - buf = self.cookedq[:i] - self.cookedq = self.cookedq[i:] - return buf - if timeout is not None: - elapsed = time() - time_start - if elapsed >= timeout: - break - s_args = s_reply + (timeout-elapsed,) - return self.read_very_lazy() - - def read_all(self): - """Read all data until EOF; block until connection closed.""" - self.process_rawq() - while not self.eof: - self.fill_rawq() - self.process_rawq() - buf = self.cookedq - self.cookedq = '' - return buf - - def read_some(self): - """Read at least one byte of cooked data unless EOF is hit. - - Return '' if EOF is hit. Block if no data is immediately - available. - - """ - self.process_rawq() - while not self.cookedq and not self.eof: - self.fill_rawq() - self.process_rawq() - buf = self.cookedq - self.cookedq = '' - return buf - - def read_very_eager(self): - """Read everything that's possible without blocking in I/O (eager). - - Raise EOFError if connection closed and no cooked data - available. Return '' if no cooked data available otherwise. - Don't block unless in the midst of an IAC sequence. - - """ - self.process_rawq() - while not self.eof and self.sock_avail(): - self.fill_rawq() - self.process_rawq() - return self.read_very_lazy() - - def read_eager(self): - """Read readily available data. - - Raise EOFError if connection closed and no cooked data - available. Return '' if no cooked data available otherwise. - Don't block unless in the midst of an IAC sequence. - - """ - self.process_rawq() - while not self.cookedq and not self.eof and self.sock_avail(): - self.fill_rawq() - self.process_rawq() - return self.read_very_lazy() - - def read_lazy(self): - """Process and return data that's already in the queues (lazy). - - Raise EOFError if connection closed and no data available. - Return '' if no cooked data available otherwise. Don't block - unless in the midst of an IAC sequence. - - """ - self.process_rawq() - return self.read_very_lazy() - - def read_very_lazy(self): - """Return any data available in the cooked queue (very lazy). - - Raise EOFError if connection closed and no data available. - Return '' if no cooked data available otherwise. Don't block. - - """ - buf = self.cookedq - self.cookedq = '' - if not buf and self.eof and not self.rawq: - raise EOFError, 'telnet connection closed' - return buf - - def read_sb_data(self): - """Return any data available in the SB ... SE queue. - - Return '' if no SB ... SE available. Should only be called - after seeing a SB or SE command. When a new SB command is - found, old unread SB data will be discarded. Don't block. - - """ - buf = self.sbdataq - self.sbdataq = '' - return buf - - def set_option_negotiation_callback(self, callback): - """Provide a callback function called after each receipt of a telnet option.""" - self.option_callback = callback - - def process_rawq(self): - """Transfer from raw queue to cooked queue. - - Set self.eof when connection is closed. Don't block unless in - the midst of an IAC sequence. - - """ - buf = ['', ''] - try: - while self.rawq: - c = self.rawq_getchar() - if not self.iacseq: - if c == theNULL: - continue - if c == "\021": - continue - if c != IAC: - buf[self.sb] = buf[self.sb] + c - continue - else: - self.iacseq += c - elif len(self.iacseq) == 1: - # 'IAC: IAC CMD [OPTION only for WILL/WONT/DO/DONT]' - if c in (DO, DONT, WILL, WONT): - self.iacseq += c - continue - - self.iacseq = '' - if c == IAC: - buf[self.sb] = buf[self.sb] + c - else: - if c == SB: # SB ... SE start. - self.sb = 1 - self.sbdataq = '' - elif c == SE: - self.sb = 0 - self.sbdataq = self.sbdataq + buf[1] - buf[1] = '' - if self.option_callback: - # Callback is supposed to look into - # the sbdataq - self.option_callback(self.sock, c, NOOPT) - else: - # We can't offer automatic processing of - # suboptions. Alas, we should not get any - # unless we did a WILL/DO before. - self.msg('IAC %d not recognized' % ord(c)) - elif len(self.iacseq) == 2: - cmd = self.iacseq[1] - self.iacseq = '' - opt = c - if cmd in (DO, DONT): - self.msg('IAC %s %d', - cmd == DO and 'DO' or 'DONT', ord(opt)) - if self.option_callback: - self.option_callback(self.sock, cmd, opt) - else: - self.sock.sendall(IAC + WONT + opt) - elif cmd in (WILL, WONT): - self.msg('IAC %s %d', - cmd == WILL and 'WILL' or 'WONT', ord(opt)) - if self.option_callback: - self.option_callback(self.sock, cmd, opt) - else: - self.sock.sendall(IAC + DONT + opt) - except EOFError: # raised by self.rawq_getchar() - self.iacseq = '' # Reset on EOF - self.sb = 0 - pass - self.cookedq = self.cookedq + buf[0] - self.sbdataq = self.sbdataq + buf[1] - - def rawq_getchar(self): - """Get next char from raw queue. - - Block if no data is immediately available. Raise EOFError - when connection is closed. - - """ - if not self.rawq: - self.fill_rawq() - if self.eof: - raise EOFError - c = self.rawq[self.irawq] - self.irawq = self.irawq + 1 - if self.irawq >= len(self.rawq): - self.rawq = '' - self.irawq = 0 - return c - - def fill_rawq(self): - """Fill raw queue from exactly one recv() system call. - - Block if no data is immediately available. Set self.eof when - connection is closed. - - """ - if self.irawq >= len(self.rawq): - self.rawq = '' - self.irawq = 0 - # The buffer size should be fairly small so as to avoid quadratic - # behavior in process_rawq() above - buf = self.sock.recv(50) - self.msg("recv %r", buf) - self.eof = (not buf) - self.rawq = self.rawq + buf - - def sock_avail(self): - """Test whether data is available on the socket.""" - return select.select([self], [], [], 0) == ([self], [], []) - - def interact(self): - """Interaction function, emulates a very dumb telnet client.""" - if sys.platform == "win32": - self.mt_interact() - return - while 1: - rfd, wfd, xfd = select.select([self, sys.stdin], [], []) - if self in rfd: - try: - text = self.read_eager() - except EOFError: - print '*** Connection closed by remote host ***' - break - if text: - sys.stdout.write(text) - sys.stdout.flush() - if sys.stdin in rfd: - line = sys.stdin.readline() - if not line: - break - self.write(line) - - def mt_interact(self): - """Multithreaded version of interact().""" - import thread - thread.start_new_thread(self.listener, ()) - while 1: - line = sys.stdin.readline() - if not line: - break - self.write(line) - - def listener(self): - """Helper for mt_interact() -- this executes in the other thread.""" - while 1: - try: - data = self.read_eager() - except EOFError: - print '*** Connection closed by remote host ***' - return - if data: - sys.stdout.write(data) - else: - sys.stdout.flush() - - def expect(self, list, timeout=None): - """Read until one from a list of a regular expressions matches. - - The first argument is a list of regular expressions, either - compiled (re.RegexObject instances) or uncompiled (strings). - The optional second argument is a timeout, in seconds; default - is no timeout. - - Return a tuple of three items: the index in the list of the - first regular expression that matches; the match object - returned; and the text read up till and including the match. - - If EOF is read and no text was read, raise EOFError. - Otherwise, when nothing matches, return (-1, None, text) where - text is the text received so far (may be the empty string if a - timeout happened). - - If a regular expression ends with a greedy match (e.g. '.*') - or if more than one expression can match the same input, the - results are undeterministic, and may depend on the I/O timing. - - """ - if self._has_poll: - return self._expect_with_poll(list, timeout) - else: - return self._expect_with_select(list, timeout) - - def _expect_with_poll(self, expect_list, timeout=None): - """Read until one from a list of a regular expressions matches. - - This method uses select.poll() to implement the timeout. - """ - re = None - expect_list = expect_list[:] - indices = range(len(expect_list)) - for i in indices: - if not hasattr(expect_list[i], "search"): - if not re: import re - expect_list[i] = re.compile(expect_list[i]) - call_timeout = timeout - if timeout is not None: - from time import time - time_start = time() - self.process_rawq() - m = None - for i in indices: - m = expect_list[i].search(self.cookedq) - if m: - e = m.end() - text = self.cookedq[:e] - self.cookedq = self.cookedq[e:] - break - if not m: - poller = select.poll() - poll_in_or_priority_flags = select.POLLIN | select.POLLPRI - poller.register(self, poll_in_or_priority_flags) - while not m and not self.eof: - try: - ready = poller.poll(None if timeout is None - else 1000 * call_timeout) - except select.error as e: - if e.errno == errno.EINTR: - if timeout is not None: - elapsed = time() - time_start - call_timeout = timeout-elapsed - continue - raise - for fd, mode in ready: - if mode & poll_in_or_priority_flags: - self.fill_rawq() - self.process_rawq() - for i in indices: - m = expect_list[i].search(self.cookedq) - if m: - e = m.end() - text = self.cookedq[:e] - self.cookedq = self.cookedq[e:] - break - if timeout is not None: - elapsed = time() - time_start - if elapsed >= timeout: - break - call_timeout = timeout-elapsed - poller.unregister(self) - if m: - return (i, m, text) - text = self.read_very_lazy() - if not text and self.eof: - raise EOFError - return (-1, None, text) - - def _expect_with_select(self, list, timeout=None): - """Read until one from a list of a regular expressions matches. - - The timeout is implemented using select.select(). - """ - re = None - list = list[:] - indices = range(len(list)) - for i in indices: - if not hasattr(list[i], "search"): - if not re: import re - list[i] = re.compile(list[i]) - if timeout is not None: - from time import time - time_start = time() - while 1: - self.process_rawq() - for i in indices: - m = list[i].search(self.cookedq) - if m: - e = m.end() - text = self.cookedq[:e] - self.cookedq = self.cookedq[e:] - return (i, m, text) - if self.eof: - break - if timeout is not None: - elapsed = time() - time_start - if elapsed >= timeout: - break - s_args = ([self.fileno()], [], [], timeout-elapsed) - r, w, x = select.select(*s_args) - if not r: - break - self.fill_rawq() - text = self.read_very_lazy() - if not text and self.eof: - raise EOFError - return (-1, None, text) - - -def test(): - """Test program for telnetlib. - - Usage: python telnetlib.py [-d] ... [host [port]] - - Default host is localhost; default port is 23. - - """ - debuglevel = 0 - while sys.argv[1:] and sys.argv[1] == '-d': - debuglevel = debuglevel+1 - del sys.argv[1] - host = 'localhost' - if sys.argv[1:]: - host = sys.argv[1] - port = 0 - if sys.argv[2:]: - portstr = sys.argv[2] - try: - port = int(portstr) - except ValueError: - port = socket.getservbyname(portstr, 'tcp') - tn = Telnet() - tn.set_debuglevel(debuglevel) - tn.open(host, port, timeout=0.5) - tn.interact() - tn.close() - -if __name__ == '__main__': - test() diff --git a/python/Lib/tempfile.py b/python/Lib/tempfile.py deleted file mode 100755 index 7e3b25a070..0000000000 --- a/python/Lib/tempfile.py +++ /dev/null @@ -1,640 +0,0 @@ -"""Temporary files. - -This module provides generic, low- and high-level interfaces for -creating temporary files and directories. All of the interfaces -provided by this module can be used without fear of race conditions -except for 'mktemp'. 'mktemp' is subject to race conditions and -should not be used; it is provided for backward compatibility only. - -This module also provides some data items to the user: - - TMP_MAX - maximum number of names that will be tried before - giving up. - template - the default prefix for all temporary names. - You may change this to control the default prefix. - tempdir - If this is set to a string before the first use of - any routine from this module, it will be considered as - another candidate location to store temporary files. -""" - -__all__ = [ - "NamedTemporaryFile", "TemporaryFile", # high level safe interfaces - "SpooledTemporaryFile", - "mkstemp", "mkdtemp", # low level safe interfaces - "mktemp", # deprecated unsafe interface - "TMP_MAX", "gettempprefix", # constants - "tempdir", "gettempdir" - ] - - -# Imports. - -import io as _io -import os as _os -import errno as _errno -from random import Random as _Random - -try: - from cStringIO import StringIO as _StringIO -except ImportError: - from StringIO import StringIO as _StringIO - -try: - import fcntl as _fcntl -except ImportError: - def _set_cloexec(fd): - pass -else: - def _set_cloexec(fd): - try: - flags = _fcntl.fcntl(fd, _fcntl.F_GETFD, 0) - except IOError: - pass - else: - # flags read successfully, modify - flags |= _fcntl.FD_CLOEXEC - _fcntl.fcntl(fd, _fcntl.F_SETFD, flags) - - -try: - import thread as _thread -except ImportError: - import dummy_thread as _thread -_allocate_lock = _thread.allocate_lock - -_text_openflags = _os.O_RDWR | _os.O_CREAT | _os.O_EXCL -if hasattr(_os, 'O_NOINHERIT'): - _text_openflags |= _os.O_NOINHERIT -if hasattr(_os, 'O_NOFOLLOW'): - _text_openflags |= _os.O_NOFOLLOW - -_bin_openflags = _text_openflags -if hasattr(_os, 'O_BINARY'): - _bin_openflags |= _os.O_BINARY - -if hasattr(_os, 'TMP_MAX'): - TMP_MAX = _os.TMP_MAX -else: - TMP_MAX = 10000 - -template = "tmp" - -# Internal routines. - -_once_lock = _allocate_lock() - -if hasattr(_os, "lstat"): - _stat = _os.lstat -elif hasattr(_os, "stat"): - _stat = _os.stat -else: - # Fallback. All we need is something that raises os.error if the - # file doesn't exist. - def _stat(fn): - try: - f = open(fn) - except IOError: - raise _os.error - f.close() - -def _exists(fn): - try: - _stat(fn) - except _os.error: - return False - else: - return True - -class _RandomNameSequence: - """An instance of _RandomNameSequence generates an endless - sequence of unpredictable strings which can safely be incorporated - into file names. Each string is six characters long. Multiple - threads can safely use the same instance at the same time. - - _RandomNameSequence is an iterator.""" - - characters = ("abcdefghijklmnopqrstuvwxyz" + - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + - "0123456789_") - - def __init__(self): - self.mutex = _allocate_lock() - self.normcase = _os.path.normcase - - @property - def rng(self): - cur_pid = _os.getpid() - if cur_pid != getattr(self, '_rng_pid', None): - self._rng = _Random() - self._rng_pid = cur_pid - return self._rng - - def __iter__(self): - return self - - def next(self): - m = self.mutex - c = self.characters - choose = self.rng.choice - - m.acquire() - try: - letters = [choose(c) for dummy in "123456"] - finally: - m.release() - - return self.normcase(''.join(letters)) - -def _candidate_tempdir_list(): - """Generate a list of candidate temporary directories which - _get_default_tempdir will try.""" - - dirlist = [] - - # First, try the environment. - for envname in 'TMPDIR', 'TEMP', 'TMP': - dirname = _os.getenv(envname) - if dirname: dirlist.append(dirname) - - # Failing that, try OS-specific locations. - if _os.name == 'riscos': - dirname = _os.getenv('Wimp$ScrapDir') - if dirname: dirlist.append(dirname) - elif _os.name == 'nt': - dirlist.extend([ r'c:\temp', r'c:\tmp', r'\temp', r'\tmp' ]) - else: - dirlist.extend([ '/tmp', '/var/tmp', '/usr/tmp' ]) - - # As a last resort, the current directory. - try: - dirlist.append(_os.getcwd()) - except (AttributeError, _os.error): - dirlist.append(_os.curdir) - - return dirlist - -def _get_default_tempdir(): - """Calculate the default directory to use for temporary files. - This routine should be called exactly once. - - We determine whether or not a candidate temp dir is usable by - trying to create and write to a file in that directory. If this - is successful, the test file is deleted. To prevent denial of - service, the name of the test file must be randomized.""" - - namer = _RandomNameSequence() - dirlist = _candidate_tempdir_list() - flags = _text_openflags - - for dir in dirlist: - if dir != _os.curdir: - dir = _os.path.normcase(_os.path.abspath(dir)) - # Try only a few names per directory. - for seq in xrange(100): - name = namer.next() - filename = _os.path.join(dir, name) - try: - fd = _os.open(filename, flags, 0o600) - try: - try: - with _io.open(fd, 'wb', closefd=False) as fp: - fp.write(b'blat') - finally: - _os.close(fd) - finally: - _os.unlink(filename) - return dir - except (OSError, IOError) as e: - if e.args[0] == _errno.EEXIST: - continue - if (_os.name == 'nt' and e.args[0] == _errno.EACCES and - _os.path.isdir(dir) and _os.access(dir, _os.W_OK)): - # On windows, when a directory with the chosen name already - # exists, EACCES error code is returned instead of EEXIST. - continue - break # no point trying more names in this directory - raise IOError, (_errno.ENOENT, - ("No usable temporary directory found in %s" % dirlist)) - -_name_sequence = None - -def _get_candidate_names(): - """Common setup sequence for all user-callable interfaces.""" - - global _name_sequence - if _name_sequence is None: - _once_lock.acquire() - try: - if _name_sequence is None: - _name_sequence = _RandomNameSequence() - finally: - _once_lock.release() - return _name_sequence - - -def _mkstemp_inner(dir, pre, suf, flags): - """Code common to mkstemp, TemporaryFile, and NamedTemporaryFile.""" - - names = _get_candidate_names() - - for seq in xrange(TMP_MAX): - name = names.next() - file = _os.path.join(dir, pre + name + suf) - try: - fd = _os.open(file, flags, 0600) - _set_cloexec(fd) - return (fd, _os.path.abspath(file)) - except OSError, e: - if e.errno == _errno.EEXIST: - continue # try again - if (_os.name == 'nt' and e.errno == _errno.EACCES and - _os.path.isdir(dir) and _os.access(dir, _os.W_OK)): - # On windows, when a directory with the chosen name already - # exists, EACCES error code is returned instead of EEXIST. - continue - raise - - raise IOError, (_errno.EEXIST, "No usable temporary file name found") - - -# User visible interfaces. - -def gettempprefix(): - """Accessor for tempdir.template.""" - return template - -tempdir = None - -def gettempdir(): - """Accessor for tempfile.tempdir.""" - global tempdir - if tempdir is None: - _once_lock.acquire() - try: - if tempdir is None: - tempdir = _get_default_tempdir() - finally: - _once_lock.release() - return tempdir - -def mkstemp(suffix="", prefix=template, dir=None, text=False): - """User-callable function to create and return a unique temporary - file. The return value is a pair (fd, name) where fd is the - file descriptor returned by os.open, and name is the filename. - - If 'suffix' is specified, the file name will end with that suffix, - otherwise there will be no suffix. - - If 'prefix' is specified, the file name will begin with that prefix, - otherwise a default prefix is used. - - If 'dir' is specified, the file will be created in that directory, - otherwise a default directory is used. - - If 'text' is specified and true, the file is opened in text - mode. Else (the default) the file is opened in binary mode. On - some operating systems, this makes no difference. - - The file is readable and writable only by the creating user ID. - If the operating system uses permission bits to indicate whether a - file is executable, the file is executable by no one. The file - descriptor is not inherited by children of this process. - - Caller is responsible for deleting the file when done with it. - """ - - if dir is None: - dir = gettempdir() - - if text: - flags = _text_openflags - else: - flags = _bin_openflags - - return _mkstemp_inner(dir, prefix, suffix, flags) - - -def mkdtemp(suffix="", prefix=template, dir=None): - """User-callable function to create and return a unique temporary - directory. The return value is the pathname of the directory. - - Arguments are as for mkstemp, except that the 'text' argument is - not accepted. - - The directory is readable, writable, and searchable only by the - creating user. - - Caller is responsible for deleting the directory when done with it. - """ - - if dir is None: - dir = gettempdir() - - names = _get_candidate_names() - - for seq in xrange(TMP_MAX): - name = names.next() - file = _os.path.join(dir, prefix + name + suffix) - try: - _os.mkdir(file, 0700) - return file - except OSError, e: - if e.errno == _errno.EEXIST: - continue # try again - if (_os.name == 'nt' and e.errno == _errno.EACCES and - _os.path.isdir(dir) and _os.access(dir, _os.W_OK)): - # On windows, when a directory with the chosen name already - # exists, EACCES error code is returned instead of EEXIST. - continue - raise - - raise IOError, (_errno.EEXIST, "No usable temporary directory name found") - -def mktemp(suffix="", prefix=template, dir=None): - """User-callable function to return a unique temporary file name. The - file is not created. - - Arguments are as for mkstemp, except that the 'text' argument is - not accepted. - - This function is unsafe and should not be used. The file name - refers to a file that did not exist at some point, but by the time - you get around to creating it, someone else may have beaten you to - the punch. - """ - -## from warnings import warn as _warn -## _warn("mktemp is a potential security risk to your program", -## RuntimeWarning, stacklevel=2) - - if dir is None: - dir = gettempdir() - - names = _get_candidate_names() - for seq in xrange(TMP_MAX): - name = names.next() - file = _os.path.join(dir, prefix + name + suffix) - if not _exists(file): - return file - - raise IOError, (_errno.EEXIST, "No usable temporary filename found") - - -class _TemporaryFileWrapper: - """Temporary file wrapper - - This class provides a wrapper around files opened for - temporary use. In particular, it seeks to automatically - remove the file when it is no longer needed. - """ - - def __init__(self, file, name, delete=True): - self.file = file - self.name = name - self.close_called = False - self.delete = delete - - def __getattr__(self, name): - # Attribute lookups are delegated to the underlying file - # and cached for non-numeric results - # (i.e. methods are cached, closed and friends are not) - file = self.__dict__['file'] - a = getattr(file, name) - if not issubclass(type(a), type(0)): - setattr(self, name, a) - return a - - # The underlying __enter__ method returns the wrong object - # (self.file) so override it to return the wrapper - def __enter__(self): - self.file.__enter__() - return self - - # NT provides delete-on-close as a primitive, so we don't need - # the wrapper to do anything special. We still use it so that - # file.name is useful (i.e. not "(fdopen)") with NamedTemporaryFile. - if _os.name != 'nt': - # Cache the unlinker so we don't get spurious errors at - # shutdown when the module-level "os" is None'd out. Note - # that this must be referenced as self.unlink, because the - # name TemporaryFileWrapper may also get None'd out before - # __del__ is called. - unlink = _os.unlink - - def close(self): - if not self.close_called: - self.close_called = True - try: - self.file.close() - finally: - if self.delete: - self.unlink(self.name) - - def __del__(self): - self.close() - - # Need to trap __exit__ as well to ensure the file gets - # deleted when used in a with statement - def __exit__(self, exc, value, tb): - result = self.file.__exit__(exc, value, tb) - self.close() - return result - else: - def __exit__(self, exc, value, tb): - self.file.__exit__(exc, value, tb) - - -def NamedTemporaryFile(mode='w+b', bufsize=-1, suffix="", - prefix=template, dir=None, delete=True): - """Create and return a temporary file. - Arguments: - 'prefix', 'suffix', 'dir' -- as for mkstemp. - 'mode' -- the mode argument to os.fdopen (default "w+b"). - 'bufsize' -- the buffer size argument to os.fdopen (default -1). - 'delete' -- whether the file is deleted on close (default True). - The file is created as mkstemp() would do it. - - Returns an object with a file-like interface; the name of the file - is accessible as its 'name' attribute. The file will be automatically - deleted when it is closed unless the 'delete' argument is set to False. - """ - - if dir is None: - dir = gettempdir() - - if 'b' in mode: - flags = _bin_openflags - else: - flags = _text_openflags - - # Setting O_TEMPORARY in the flags causes the OS to delete - # the file when it is closed. This is only supported by Windows. - if _os.name == 'nt' and delete: - flags |= _os.O_TEMPORARY - - (fd, name) = _mkstemp_inner(dir, prefix, suffix, flags) - try: - file = _os.fdopen(fd, mode, bufsize) - return _TemporaryFileWrapper(file, name, delete) - except BaseException: - _os.unlink(name) - _os.close(fd) - raise - -if _os.name != 'posix' or _os.sys.platform == 'cygwin': - # On non-POSIX and Cygwin systems, assume that we cannot unlink a file - # while it is open. - TemporaryFile = NamedTemporaryFile - -else: - def TemporaryFile(mode='w+b', bufsize=-1, suffix="", - prefix=template, dir=None): - """Create and return a temporary file. - Arguments: - 'prefix', 'suffix', 'dir' -- as for mkstemp. - 'mode' -- the mode argument to os.fdopen (default "w+b"). - 'bufsize' -- the buffer size argument to os.fdopen (default -1). - The file is created as mkstemp() would do it. - - Returns an object with a file-like interface. The file has no - name, and will cease to exist when it is closed. - """ - - if dir is None: - dir = gettempdir() - - if 'b' in mode: - flags = _bin_openflags - else: - flags = _text_openflags - - (fd, name) = _mkstemp_inner(dir, prefix, suffix, flags) - try: - _os.unlink(name) - return _os.fdopen(fd, mode, bufsize) - except: - _os.close(fd) - raise - -class SpooledTemporaryFile: - """Temporary file wrapper, specialized to switch from - StringIO to a real file when it exceeds a certain size or - when a fileno is needed. - """ - _rolled = False - - def __init__(self, max_size=0, mode='w+b', bufsize=-1, - suffix="", prefix=template, dir=None): - self._file = _StringIO() - self._max_size = max_size - self._rolled = False - self._TemporaryFileArgs = (mode, bufsize, suffix, prefix, dir) - - def _check(self, file): - if self._rolled: return - max_size = self._max_size - if max_size and file.tell() > max_size: - self.rollover() - - def rollover(self): - if self._rolled: return - file = self._file - newfile = self._file = TemporaryFile(*self._TemporaryFileArgs) - del self._TemporaryFileArgs - - newfile.write(file.getvalue()) - newfile.seek(file.tell(), 0) - - self._rolled = True - - # The method caching trick from NamedTemporaryFile - # won't work here, because _file may change from a - # _StringIO instance to a real file. So we list - # all the methods directly. - - # Context management protocol - def __enter__(self): - if self._file.closed: - raise ValueError("Cannot enter context with closed file") - return self - - def __exit__(self, exc, value, tb): - self._file.close() - - # file protocol - def __iter__(self): - return self._file.__iter__() - - def close(self): - self._file.close() - - @property - def closed(self): - return self._file.closed - - def fileno(self): - self.rollover() - return self._file.fileno() - - def flush(self): - self._file.flush() - - def isatty(self): - return self._file.isatty() - - @property - def mode(self): - try: - return self._file.mode - except AttributeError: - return self._TemporaryFileArgs[0] - - @property - def name(self): - try: - return self._file.name - except AttributeError: - return None - - def next(self): - return self._file.next - - def read(self, *args): - return self._file.read(*args) - - def readline(self, *args): - return self._file.readline(*args) - - def readlines(self, *args): - return self._file.readlines(*args) - - def seek(self, *args): - self._file.seek(*args) - - @property - def softspace(self): - return self._file.softspace - - def tell(self): - return self._file.tell() - - def truncate(self): - self._file.truncate() - - def write(self, s): - file = self._file - rv = file.write(s) - self._check(file) - return rv - - def writelines(self, iterable): - file = self._file - rv = file.writelines(iterable) - self._check(file) - return rv - - def xreadlines(self, *args): - if hasattr(self._file, 'xreadlines'): # real file - return iter(self._file) - else: # StringIO() - return iter(self._file.readlines(*args)) diff --git a/python/Lib/textwrap.py b/python/Lib/textwrap.py deleted file mode 100755 index 5c2e4fa523..0000000000 --- a/python/Lib/textwrap.py +++ /dev/null @@ -1,429 +0,0 @@ -"""Text wrapping and filling. -""" - -# Copyright (C) 1999-2001 Gregory P. Ward. -# Copyright (C) 2002, 2003 Python Software Foundation. -# Written by Greg Ward - -__revision__ = "$Id$" - -import string, re - -try: - _unicode = unicode -except NameError: - # If Python is built without Unicode support, the unicode type - # will not exist. Fake one. - class _unicode(object): - pass - -# Do the right thing with boolean values for all known Python versions -# (so this module can be copied to projects that don't depend on Python -# 2.3, e.g. Optik and Docutils) by uncommenting the block of code below. -#try: -# True, False -#except NameError: -# (True, False) = (1, 0) - -__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent'] - -# Hardcode the recognized whitespace characters to the US-ASCII -# whitespace characters. The main reason for doing this is that in -# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales -# that character winds up in string.whitespace. Respecting -# string.whitespace in those cases would 1) make textwrap treat 0xa0 the -# same as any other whitespace char, which is clearly wrong (it's a -# *non-breaking* space), 2) possibly cause problems with Unicode, -# since 0xa0 is not in range(128). -_whitespace = '\t\n\x0b\x0c\r ' - -class TextWrapper: - """ - Object for wrapping/filling text. The public interface consists of - the wrap() and fill() methods; the other methods are just there for - subclasses to override in order to tweak the default behaviour. - If you want to completely replace the main wrapping algorithm, - you'll probably have to override _wrap_chunks(). - - Several instance attributes control various aspects of wrapping: - width (default: 70) - the maximum width of wrapped lines (unless break_long_words - is false) - initial_indent (default: "") - string that will be prepended to the first line of wrapped - output. Counts towards the line's width. - subsequent_indent (default: "") - string that will be prepended to all lines save the first - of wrapped output; also counts towards each line's width. - expand_tabs (default: true) - Expand tabs in input text to spaces before further processing. - Each tab will become 1 .. 8 spaces, depending on its position in - its line. If false, each tab is treated as a single character. - replace_whitespace (default: true) - Replace all whitespace characters in the input text by spaces - after tab expansion. Note that if expand_tabs is false and - replace_whitespace is true, every tab will be converted to a - single space! - fix_sentence_endings (default: false) - Ensure that sentence-ending punctuation is always followed - by two spaces. Off by default because the algorithm is - (unavoidably) imperfect. - break_long_words (default: true) - Break words longer than 'width'. If false, those words will not - be broken, and some lines might be longer than 'width'. - break_on_hyphens (default: true) - Allow breaking hyphenated words. If true, wrapping will occur - preferably on whitespaces and right after hyphens part of - compound words. - drop_whitespace (default: true) - Drop leading and trailing whitespace from lines. - """ - - whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace)) - - unicode_whitespace_trans = {} - uspace = ord(u' ') - for x in map(ord, _whitespace): - unicode_whitespace_trans[x] = uspace - - # This funky little regex is just the trick for splitting - # text up into word-wrappable chunks. E.g. - # "Hello there -- you goof-ball, use the -b option!" - # splits into - # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! - # (after stripping out empty strings). - wordsep_re = re.compile( - r'(\s+|' # any whitespace - r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words - r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash - - # This less funky little regex just split on recognized spaces. E.g. - # "Hello there -- you goof-ball, use the -b option!" - # splits into - # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ - wordsep_simple_re = re.compile(r'(\s+)') - - # XXX this is not locale- or charset-aware -- string.lowercase - # is US-ASCII only (and therefore English-only) - sentence_end_re = re.compile(r'[%s]' # lowercase letter - r'[\.\!\?]' # sentence-ending punct. - r'[\"\']?' # optional end-of-quote - r'\Z' # end of chunk - % string.lowercase) - - - def __init__(self, - width=70, - initial_indent="", - subsequent_indent="", - expand_tabs=True, - replace_whitespace=True, - fix_sentence_endings=False, - break_long_words=True, - drop_whitespace=True, - break_on_hyphens=True): - self.width = width - self.initial_indent = initial_indent - self.subsequent_indent = subsequent_indent - self.expand_tabs = expand_tabs - self.replace_whitespace = replace_whitespace - self.fix_sentence_endings = fix_sentence_endings - self.break_long_words = break_long_words - self.drop_whitespace = drop_whitespace - self.break_on_hyphens = break_on_hyphens - - # recompile the regexes for Unicode mode -- done in this clumsy way for - # backwards compatibility because it's rather common to monkey-patch - # the TextWrapper class' wordsep_re attribute. - self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U) - self.wordsep_simple_re_uni = re.compile( - self.wordsep_simple_re.pattern, re.U) - - - # -- Private methods ----------------------------------------------- - # (possibly useful for subclasses to override) - - def _munge_whitespace(self, text): - """_munge_whitespace(text : string) -> string - - Munge whitespace in text: expand tabs and convert all other - whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz" - becomes " foo bar baz". - """ - if self.expand_tabs: - text = text.expandtabs() - if self.replace_whitespace: - if isinstance(text, str): - text = text.translate(self.whitespace_trans) - elif isinstance(text, _unicode): - text = text.translate(self.unicode_whitespace_trans) - return text - - - def _split(self, text): - """_split(text : string) -> [string] - - Split the text to wrap into indivisible chunks. Chunks are - not quite the same as words; see _wrap_chunks() for full - details. As an example, the text - Look, goof-ball -- use the -b option! - breaks into the following chunks: - 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', - 'use', ' ', 'the', ' ', '-b', ' ', 'option!' - if break_on_hyphens is True, or in: - 'Look,', ' ', 'goof-ball', ' ', '--', ' ', - 'use', ' ', 'the', ' ', '-b', ' ', option!' - otherwise. - """ - if isinstance(text, _unicode): - if self.break_on_hyphens: - pat = self.wordsep_re_uni - else: - pat = self.wordsep_simple_re_uni - else: - if self.break_on_hyphens: - pat = self.wordsep_re - else: - pat = self.wordsep_simple_re - chunks = pat.split(text) - chunks = filter(None, chunks) # remove empty chunks - return chunks - - def _fix_sentence_endings(self, chunks): - """_fix_sentence_endings(chunks : [string]) - - Correct for sentence endings buried in 'chunks'. Eg. when the - original text contains "... foo.\\nBar ...", munge_whitespace() - and split() will convert that to [..., "foo.", " ", "Bar", ...] - which has one too few spaces; this method simply changes the one - space to two. - """ - i = 0 - patsearch = self.sentence_end_re.search - while i < len(chunks)-1: - if chunks[i+1] == " " and patsearch(chunks[i]): - chunks[i+1] = " " - i += 2 - else: - i += 1 - - def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): - """_handle_long_word(chunks : [string], - cur_line : [string], - cur_len : int, width : int) - - Handle a chunk of text (most likely a word, not whitespace) that - is too long to fit in any line. - """ - # Figure out when indent is larger than the specified width, and make - # sure at least one character is stripped off on every pass - if width < 1: - space_left = 1 - else: - space_left = width - cur_len - - # If we're allowed to break long words, then do so: put as much - # of the next chunk onto the current line as will fit. - if self.break_long_words: - cur_line.append(reversed_chunks[-1][:space_left]) - reversed_chunks[-1] = reversed_chunks[-1][space_left:] - - # Otherwise, we have to preserve the long word intact. Only add - # it to the current line if there's nothing already there -- - # that minimizes how much we violate the width constraint. - elif not cur_line: - cur_line.append(reversed_chunks.pop()) - - # If we're not allowed to break long words, and there's already - # text on the current line, do nothing. Next time through the - # main loop of _wrap_chunks(), we'll wind up here again, but - # cur_len will be zero, so the next line will be entirely - # devoted to the long word that we can't handle right now. - - def _wrap_chunks(self, chunks): - """_wrap_chunks(chunks : [string]) -> [string] - - Wrap a sequence of text chunks and return a list of lines of - length 'self.width' or less. (If 'break_long_words' is false, - some lines may be longer than this.) Chunks correspond roughly - to words and the whitespace between them: each chunk is - indivisible (modulo 'break_long_words'), but a line break can - come between any two chunks. Chunks should not have internal - whitespace; ie. a chunk is either all whitespace or a "word". - Whitespace chunks will be removed from the beginning and end of - lines, but apart from that whitespace is preserved. - """ - lines = [] - if self.width <= 0: - raise ValueError("invalid width %r (must be > 0)" % self.width) - - # Arrange in reverse order so items can be efficiently popped - # from a stack of chucks. - chunks.reverse() - - while chunks: - - # Start the list of chunks that will make up the current line. - # cur_len is just the length of all the chunks in cur_line. - cur_line = [] - cur_len = 0 - - # Figure out which static string will prefix this line. - if lines: - indent = self.subsequent_indent - else: - indent = self.initial_indent - - # Maximum width for this line. - width = self.width - len(indent) - - # First chunk on line is whitespace -- drop it, unless this - # is the very beginning of the text (ie. no lines started yet). - if self.drop_whitespace and chunks[-1].strip() == '' and lines: - del chunks[-1] - - while chunks: - l = len(chunks[-1]) - - # Can at least squeeze this chunk onto the current line. - if cur_len + l <= width: - cur_line.append(chunks.pop()) - cur_len += l - - # Nope, this line is full. - else: - break - - # The current line is full, and the next chunk is too big to - # fit on *any* line (not just this one). - if chunks and len(chunks[-1]) > width: - self._handle_long_word(chunks, cur_line, cur_len, width) - - # If the last chunk on this line is all whitespace, drop it. - if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': - del cur_line[-1] - - # Convert current line back to a string and store it in list - # of all lines (return value). - if cur_line: - lines.append(indent + ''.join(cur_line)) - - return lines - - - # -- Public interface ---------------------------------------------- - - def wrap(self, text): - """wrap(text : string) -> [string] - - Reformat the single paragraph in 'text' so it fits in lines of - no more than 'self.width' columns, and return a list of wrapped - lines. Tabs in 'text' are expanded with string.expandtabs(), - and all other whitespace characters (including newline) are - converted to space. - """ - text = self._munge_whitespace(text) - chunks = self._split(text) - if self.fix_sentence_endings: - self._fix_sentence_endings(chunks) - return self._wrap_chunks(chunks) - - def fill(self, text): - """fill(text : string) -> string - - Reformat the single paragraph in 'text' to fit in lines of no - more than 'self.width' columns, and return a new string - containing the entire wrapped paragraph. - """ - return "\n".join(self.wrap(text)) - - -# -- Convenience interface --------------------------------------------- - -def wrap(text, width=70, **kwargs): - """Wrap a single paragraph of text, returning a list of wrapped lines. - - Reformat the single paragraph in 'text' so it fits in lines of no - more than 'width' columns, and return a list of wrapped lines. By - default, tabs in 'text' are expanded with string.expandtabs(), and - all other whitespace characters (including newline) are converted to - space. See TextWrapper class for available keyword args to customize - wrapping behaviour. - """ - w = TextWrapper(width=width, **kwargs) - return w.wrap(text) - -def fill(text, width=70, **kwargs): - """Fill a single paragraph of text, returning a new string. - - Reformat the single paragraph in 'text' to fit in lines of no more - than 'width' columns, and return a new string containing the entire - wrapped paragraph. As with wrap(), tabs are expanded and other - whitespace characters converted to space. See TextWrapper class for - available keyword args to customize wrapping behaviour. - """ - w = TextWrapper(width=width, **kwargs) - return w.fill(text) - - -# -- Loosely related functionality ------------------------------------- - -_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) -_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) - -def dedent(text): - """Remove any common leading whitespace from every line in `text`. - - This can be used to make triple-quoted strings line up with the left - edge of the display, while still presenting them in the source code - in indented form. - - Note that tabs and spaces are both treated as whitespace, but they - are not equal: the lines " hello" and "\\thello" are - considered to have no common leading whitespace. (This behaviour is - new in Python 2.5; older versions of this module incorrectly - expanded tabs before searching for common leading whitespace.) - """ - # Look for the longest leading string of spaces and tabs common to - # all lines. - margin = None - text = _whitespace_only_re.sub('', text) - indents = _leading_whitespace_re.findall(text) - for indent in indents: - if margin is None: - margin = indent - - # Current line more deeply indented than previous winner: - # no change (previous winner is still on top). - elif indent.startswith(margin): - pass - - # Current line consistent with and no deeper than previous winner: - # it's the new winner. - elif margin.startswith(indent): - margin = indent - - # Find the largest common whitespace between current line and previous - # winner. - else: - for i, (x, y) in enumerate(zip(margin, indent)): - if x != y: - margin = margin[:i] - break - else: - margin = margin[:len(indent)] - - # sanity check (testing/debugging only) - if 0 and margin: - for line in text.split("\n"): - assert not line or line.startswith(margin), \ - "line = %r, margin = %r" % (line, margin) - - if margin: - text = re.sub(r'(?m)^' + margin, '', text) - return text - -if __name__ == "__main__": - #print dedent("\tfoo\n\tbar") - #print dedent(" \thello there\n \t how are you?") - print dedent("Hello there.\n This is indented.") diff --git a/python/Lib/this.py b/python/Lib/this.py deleted file mode 100755 index 37754b785a..0000000000 --- a/python/Lib/this.py +++ /dev/null @@ -1,28 +0,0 @@ -s = """Gur Mra bs Clguba, ol Gvz Crgref - -Ornhgvshy vf orggre guna htyl. -Rkcyvpvg vf orggre guna vzcyvpvg. -Fvzcyr vf orggre guna pbzcyrk. -Pbzcyrk vf orggre guna pbzcyvpngrq. -Syng vf orggre guna arfgrq. -Fcnefr vf orggre guna qrafr. -Ernqnovyvgl pbhagf. -Fcrpvny pnfrf nera'g fcrpvny rabhtu gb oernx gur ehyrf. -Nygubhtu cenpgvpnyvgl orngf chevgl. -Reebef fubhyq arire cnff fvyragyl. -Hayrff rkcyvpvgyl fvyraprq. -Va gur snpr bs nzovthvgl, ershfr gur grzcgngvba gb thrff. -Gurer fubhyq or bar-- naq cersrenoyl bayl bar --boivbhf jnl gb qb vg. -Nygubhtu gung jnl znl abg or boivbhf ng svefg hayrff lbh'er Qhgpu. -Abj vf orggre guna arire. -Nygubhtu arire vf bsgra orggre guna *evtug* abj. -Vs gur vzcyrzragngvba vf uneq gb rkcynva, vg'f n onq vqrn. -Vs gur vzcyrzragngvba vf rnfl gb rkcynva, vg znl or n tbbq vqrn. -Anzrfcnprf ner bar ubaxvat terng vqrn -- yrg'f qb zber bs gubfr!""" - -d = {} -for c in (65, 97): - for i in range(26): - d[chr(i+c)] = chr((i+13) % 26 + c) - -print "".join([d.get(c, c) for c in s]) diff --git a/python/Lib/threading.py b/python/Lib/threading.py deleted file mode 100755 index 527f20acc6..0000000000 --- a/python/Lib/threading.py +++ /dev/null @@ -1,1322 +0,0 @@ -"""Thread module emulating a subset of Java's threading model.""" - -import sys as _sys - -try: - import thread -except ImportError: - del _sys.modules[__name__] - raise - -import warnings - -from collections import deque as _deque -from itertools import count as _count -from time import time as _time, sleep as _sleep -from traceback import format_exc as _format_exc - -# Note regarding PEP 8 compliant aliases -# This threading model was originally inspired by Java, and inherited -# the convention of camelCase function and method names from that -# language. While those names are not in any imminent danger of being -# deprecated, starting with Python 2.6, the module now provides a -# PEP 8 compliant alias for any such method name. -# Using the new PEP 8 compliant names also facilitates substitution -# with the multiprocessing module, which doesn't provide the old -# Java inspired names. - - -# Rename some stuff so "from threading import *" is safe -__all__ = ['activeCount', 'active_count', 'Condition', 'currentThread', - 'current_thread', 'enumerate', 'Event', - 'Lock', 'RLock', 'Semaphore', 'BoundedSemaphore', 'Thread', - 'Timer', 'setprofile', 'settrace', 'local', 'stack_size'] - -_start_new_thread = thread.start_new_thread -_allocate_lock = thread.allocate_lock -_get_ident = thread.get_ident -ThreadError = thread.error -del thread - - -# sys.exc_clear is used to work around the fact that except blocks -# don't fully clear the exception until 3.0. -warnings.filterwarnings('ignore', category=DeprecationWarning, - module='threading', message='sys.exc_clear') - -# Debug support (adapted from ihooks.py). -# All the major classes here derive from _Verbose. We force that to -# be a new-style class so that all the major classes here are new-style. -# This helps debugging (type(instance) is more revealing for instances -# of new-style classes). - -_VERBOSE = False - -if __debug__: - - class _Verbose(object): - - def __init__(self, verbose=None): - if verbose is None: - verbose = _VERBOSE - self.__verbose = verbose - - def _note(self, format, *args): - if self.__verbose: - format = format % args - # Issue #4188: calling current_thread() can incur an infinite - # recursion if it has to create a DummyThread on the fly. - ident = _get_ident() - try: - name = _active[ident].name - except KeyError: - name = "" % ident - format = "%s: %s\n" % (name, format) - _sys.stderr.write(format) - -else: - # Disable this when using "python -O" - class _Verbose(object): - def __init__(self, verbose=None): - pass - def _note(self, *args): - pass - -# Support for profile and trace hooks - -_profile_hook = None -_trace_hook = None - -def setprofile(func): - """Set a profile function for all threads started from the threading module. - - The func will be passed to sys.setprofile() for each thread, before its - run() method is called. - - """ - global _profile_hook - _profile_hook = func - -def settrace(func): - """Set a trace function for all threads started from the threading module. - - The func will be passed to sys.settrace() for each thread, before its run() - method is called. - - """ - global _trace_hook - _trace_hook = func - -# Synchronization classes - -Lock = _allocate_lock - -def RLock(*args, **kwargs): - """Factory function that returns a new reentrant lock. - - A reentrant lock must be released by the thread that acquired it. Once a - thread has acquired a reentrant lock, the same thread may acquire it again - without blocking; the thread must release it once for each time it has - acquired it. - - """ - return _RLock(*args, **kwargs) - -class _RLock(_Verbose): - """A reentrant lock must be released by the thread that acquired it. Once a - thread has acquired a reentrant lock, the same thread may acquire it - again without blocking; the thread must release it once for each time it - has acquired it. - """ - - def __init__(self, verbose=None): - _Verbose.__init__(self, verbose) - self.__block = _allocate_lock() - self.__owner = None - self.__count = 0 - - def __repr__(self): - owner = self.__owner - try: - owner = _active[owner].name - except KeyError: - pass - return "<%s owner=%r count=%d>" % ( - self.__class__.__name__, owner, self.__count) - - def acquire(self, blocking=1): - """Acquire a lock, blocking or non-blocking. - - When invoked without arguments: if this thread already owns the lock, - increment the recursion level by one, and return immediately. Otherwise, - if another thread owns the lock, block until the lock is unlocked. Once - the lock is unlocked (not owned by any thread), then grab ownership, set - the recursion level to one, and return. If more than one thread is - blocked waiting until the lock is unlocked, only one at a time will be - able to grab ownership of the lock. There is no return value in this - case. - - When invoked with the blocking argument set to true, do the same thing - as when called without arguments, and return true. - - When invoked with the blocking argument set to false, do not block. If a - call without an argument would block, return false immediately; - otherwise, do the same thing as when called without arguments, and - return true. - - """ - me = _get_ident() - if self.__owner == me: - self.__count = self.__count + 1 - if __debug__: - self._note("%s.acquire(%s): recursive success", self, blocking) - return 1 - rc = self.__block.acquire(blocking) - if rc: - self.__owner = me - self.__count = 1 - if __debug__: - self._note("%s.acquire(%s): initial success", self, blocking) - else: - if __debug__: - self._note("%s.acquire(%s): failure", self, blocking) - return rc - - __enter__ = acquire - - def release(self): - """Release a lock, decrementing the recursion level. - - If after the decrement it is zero, reset the lock to unlocked (not owned - by any thread), and if any other threads are blocked waiting for the - lock to become unlocked, allow exactly one of them to proceed. If after - the decrement the recursion level is still nonzero, the lock remains - locked and owned by the calling thread. - - Only call this method when the calling thread owns the lock. A - RuntimeError is raised if this method is called when the lock is - unlocked. - - There is no return value. - - """ - if self.__owner != _get_ident(): - raise RuntimeError("cannot release un-acquired lock") - self.__count = count = self.__count - 1 - if not count: - self.__owner = None - self.__block.release() - if __debug__: - self._note("%s.release(): final release", self) - else: - if __debug__: - self._note("%s.release(): non-final release", self) - - def __exit__(self, t, v, tb): - self.release() - - # Internal methods used by condition variables - - def _acquire_restore(self, count_owner): - count, owner = count_owner - self.__block.acquire() - self.__count = count - self.__owner = owner - if __debug__: - self._note("%s._acquire_restore()", self) - - def _release_save(self): - if __debug__: - self._note("%s._release_save()", self) - count = self.__count - self.__count = 0 - owner = self.__owner - self.__owner = None - self.__block.release() - return (count, owner) - - def _is_owned(self): - return self.__owner == _get_ident() - - -def Condition(*args, **kwargs): - """Factory function that returns a new condition variable object. - - A condition variable allows one or more threads to wait until they are - notified by another thread. - - If the lock argument is given and not None, it must be a Lock or RLock - object, and it is used as the underlying lock. Otherwise, a new RLock object - is created and used as the underlying lock. - - """ - return _Condition(*args, **kwargs) - -class _Condition(_Verbose): - """Condition variables allow one or more threads to wait until they are - notified by another thread. - """ - - def __init__(self, lock=None, verbose=None): - _Verbose.__init__(self, verbose) - if lock is None: - lock = RLock() - self.__lock = lock - # Export the lock's acquire() and release() methods - self.acquire = lock.acquire - self.release = lock.release - # If the lock defines _release_save() and/or _acquire_restore(), - # these override the default implementations (which just call - # release() and acquire() on the lock). Ditto for _is_owned(). - try: - self._release_save = lock._release_save - except AttributeError: - pass - try: - self._acquire_restore = lock._acquire_restore - except AttributeError: - pass - try: - self._is_owned = lock._is_owned - except AttributeError: - pass - self.__waiters = [] - - def __enter__(self): - return self.__lock.__enter__() - - def __exit__(self, *args): - return self.__lock.__exit__(*args) - - def __repr__(self): - return "" % (self.__lock, len(self.__waiters)) - - def _release_save(self): - self.__lock.release() # No state to save - - def _acquire_restore(self, x): - self.__lock.acquire() # Ignore saved state - - def _is_owned(self): - # Return True if lock is owned by current_thread. - # This method is called only if __lock doesn't have _is_owned(). - if self.__lock.acquire(0): - self.__lock.release() - return False - else: - return True - - def wait(self, timeout=None): - """Wait until notified or until a timeout occurs. - - If the calling thread has not acquired the lock when this method is - called, a RuntimeError is raised. - - This method releases the underlying lock, and then blocks until it is - awakened by a notify() or notifyAll() call for the same condition - variable in another thread, or until the optional timeout occurs. Once - awakened or timed out, it re-acquires the lock and returns. - - When the timeout argument is present and not None, it should be a - floating point number specifying a timeout for the operation in seconds - (or fractions thereof). - - When the underlying lock is an RLock, it is not released using its - release() method, since this may not actually unlock the lock when it - was acquired multiple times recursively. Instead, an internal interface - of the RLock class is used, which really unlocks it even when it has - been recursively acquired several times. Another internal interface is - then used to restore the recursion level when the lock is reacquired. - - """ - if not self._is_owned(): - raise RuntimeError("cannot wait on un-acquired lock") - waiter = _allocate_lock() - waiter.acquire() - self.__waiters.append(waiter) - saved_state = self._release_save() - try: # restore state no matter what (e.g., KeyboardInterrupt) - if timeout is None: - waiter.acquire() - if __debug__: - self._note("%s.wait(): got it", self) - else: - # Balancing act: We can't afford a pure busy loop, so we - # have to sleep; but if we sleep the whole timeout time, - # we'll be unresponsive. The scheme here sleeps very - # little at first, longer as time goes on, but never longer - # than 20 times per second (or the timeout time remaining). - endtime = _time() + timeout - delay = 0.0005 # 500 us -> initial delay of 1 ms - while True: - gotit = waiter.acquire(0) - if gotit: - break - remaining = endtime - _time() - if remaining <= 0: - break - delay = min(delay * 2, remaining, .05) - _sleep(delay) - if not gotit: - if __debug__: - self._note("%s.wait(%s): timed out", self, timeout) - try: - self.__waiters.remove(waiter) - except ValueError: - pass - else: - if __debug__: - self._note("%s.wait(%s): got it", self, timeout) - finally: - self._acquire_restore(saved_state) - - def notify(self, n=1): - """Wake up one or more threads waiting on this condition, if any. - - If the calling thread has not acquired the lock when this method is - called, a RuntimeError is raised. - - This method wakes up at most n of the threads waiting for the condition - variable; it is a no-op if no threads are waiting. - - """ - if not self._is_owned(): - raise RuntimeError("cannot notify on un-acquired lock") - __waiters = self.__waiters - waiters = __waiters[:n] - if not waiters: - if __debug__: - self._note("%s.notify(): no waiters", self) - return - self._note("%s.notify(): notifying %d waiter%s", self, n, - n!=1 and "s" or "") - for waiter in waiters: - waiter.release() - try: - __waiters.remove(waiter) - except ValueError: - pass - - def notifyAll(self): - """Wake up all threads waiting on this condition. - - If the calling thread has not acquired the lock when this method - is called, a RuntimeError is raised. - - """ - self.notify(len(self.__waiters)) - - notify_all = notifyAll - - -def Semaphore(*args, **kwargs): - """A factory function that returns a new semaphore. - - Semaphores manage a counter representing the number of release() calls minus - the number of acquire() calls, plus an initial value. The acquire() method - blocks if necessary until it can return without making the counter - negative. If not given, value defaults to 1. - - """ - return _Semaphore(*args, **kwargs) - -class _Semaphore(_Verbose): - """Semaphores manage a counter representing the number of release() calls - minus the number of acquire() calls, plus an initial value. The acquire() - method blocks if necessary until it can return without making the counter - negative. If not given, value defaults to 1. - - """ - - # After Tim Peters' semaphore class, but not quite the same (no maximum) - - def __init__(self, value=1, verbose=None): - if value < 0: - raise ValueError("semaphore initial value must be >= 0") - _Verbose.__init__(self, verbose) - self.__cond = Condition(Lock()) - self.__value = value - - def acquire(self, blocking=1): - """Acquire a semaphore, decrementing the internal counter by one. - - When invoked without arguments: if the internal counter is larger than - zero on entry, decrement it by one and return immediately. If it is zero - on entry, block, waiting until some other thread has called release() to - make it larger than zero. This is done with proper interlocking so that - if multiple acquire() calls are blocked, release() will wake exactly one - of them up. The implementation may pick one at random, so the order in - which blocked threads are awakened should not be relied on. There is no - return value in this case. - - When invoked with blocking set to true, do the same thing as when called - without arguments, and return true. - - When invoked with blocking set to false, do not block. If a call without - an argument would block, return false immediately; otherwise, do the - same thing as when called without arguments, and return true. - - """ - rc = False - with self.__cond: - while self.__value == 0: - if not blocking: - break - if __debug__: - self._note("%s.acquire(%s): blocked waiting, value=%s", - self, blocking, self.__value) - self.__cond.wait() - else: - self.__value = self.__value - 1 - if __debug__: - self._note("%s.acquire: success, value=%s", - self, self.__value) - rc = True - return rc - - __enter__ = acquire - - def release(self): - """Release a semaphore, incrementing the internal counter by one. - - When the counter is zero on entry and another thread is waiting for it - to become larger than zero again, wake up that thread. - - """ - with self.__cond: - self.__value = self.__value + 1 - if __debug__: - self._note("%s.release: success, value=%s", - self, self.__value) - self.__cond.notify() - - def __exit__(self, t, v, tb): - self.release() - - -def BoundedSemaphore(*args, **kwargs): - """A factory function that returns a new bounded semaphore. - - A bounded semaphore checks to make sure its current value doesn't exceed its - initial value. If it does, ValueError is raised. In most situations - semaphores are used to guard resources with limited capacity. - - If the semaphore is released too many times it's a sign of a bug. If not - given, value defaults to 1. - - Like regular semaphores, bounded semaphores manage a counter representing - the number of release() calls minus the number of acquire() calls, plus an - initial value. The acquire() method blocks if necessary until it can return - without making the counter negative. If not given, value defaults to 1. - - """ - return _BoundedSemaphore(*args, **kwargs) - -class _BoundedSemaphore(_Semaphore): - """A bounded semaphore checks to make sure its current value doesn't exceed - its initial value. If it does, ValueError is raised. In most situations - semaphores are used to guard resources with limited capacity. - """ - - def __init__(self, value=1, verbose=None): - _Semaphore.__init__(self, value, verbose) - self._initial_value = value - - def release(self): - """Release a semaphore, incrementing the internal counter by one. - - When the counter is zero on entry and another thread is waiting for it - to become larger than zero again, wake up that thread. - - If the number of releases exceeds the number of acquires, - raise a ValueError. - - """ - with self._Semaphore__cond: - if self._Semaphore__value >= self._initial_value: - raise ValueError("Semaphore released too many times") - self._Semaphore__value += 1 - self._Semaphore__cond.notify() - - -def Event(*args, **kwargs): - """A factory function that returns a new event. - - Events manage a flag that can be set to true with the set() method and reset - to false with the clear() method. The wait() method blocks until the flag is - true. - - """ - return _Event(*args, **kwargs) - -class _Event(_Verbose): - """A factory function that returns a new event object. An event manages a - flag that can be set to true with the set() method and reset to false - with the clear() method. The wait() method blocks until the flag is true. - - """ - - # After Tim Peters' event class (without is_posted()) - - def __init__(self, verbose=None): - _Verbose.__init__(self, verbose) - self.__cond = Condition(Lock()) - self.__flag = False - - def _reset_internal_locks(self): - # private! called by Thread._reset_internal_locks by _after_fork() - self.__cond.__init__(Lock()) - - def isSet(self): - 'Return true if and only if the internal flag is true.' - return self.__flag - - is_set = isSet - - def set(self): - """Set the internal flag to true. - - All threads waiting for the flag to become true are awakened. Threads - that call wait() once the flag is true will not block at all. - - """ - with self.__cond: - self.__flag = True - self.__cond.notify_all() - - def clear(self): - """Reset the internal flag to false. - - Subsequently, threads calling wait() will block until set() is called to - set the internal flag to true again. - - """ - with self.__cond: - self.__flag = False - - def wait(self, timeout=None): - """Block until the internal flag is true. - - If the internal flag is true on entry, return immediately. Otherwise, - block until another thread calls set() to set the flag to true, or until - the optional timeout occurs. - - When the timeout argument is present and not None, it should be a - floating point number specifying a timeout for the operation in seconds - (or fractions thereof). - - This method returns the internal flag on exit, so it will always return - True except if a timeout is given and the operation times out. - - """ - with self.__cond: - if not self.__flag: - self.__cond.wait(timeout) - return self.__flag - -# Helper to generate new thread names -_counter = _count().next -_counter() # Consume 0 so first non-main thread has id 1. -def _newname(template="Thread-%d"): - return template % _counter() - -# Active thread administration -_active_limbo_lock = _allocate_lock() -_active = {} # maps thread id to Thread object -_limbo = {} - - -# Main class for threads - -class Thread(_Verbose): - """A class that represents a thread of control. - - This class can be safely subclassed in a limited fashion. - - """ - __initialized = False - # Need to store a reference to sys.exc_info for printing - # out exceptions when a thread tries to use a global var. during interp. - # shutdown and thus raises an exception about trying to perform some - # operation on/with a NoneType - __exc_info = _sys.exc_info - # Keep sys.exc_clear too to clear the exception just before - # allowing .join() to return. - __exc_clear = _sys.exc_clear - - def __init__(self, group=None, target=None, name=None, - args=(), kwargs=None, verbose=None): - """This constructor should always be called with keyword arguments. Arguments are: - - *group* should be None; reserved for future extension when a ThreadGroup - class is implemented. - - *target* is the callable object to be invoked by the run() - method. Defaults to None, meaning nothing is called. - - *name* is the thread name. By default, a unique name is constructed of - the form "Thread-N" where N is a small decimal number. - - *args* is the argument tuple for the target invocation. Defaults to (). - - *kwargs* is a dictionary of keyword arguments for the target - invocation. Defaults to {}. - - If a subclass overrides the constructor, it must make sure to invoke - the base class constructor (Thread.__init__()) before doing anything - else to the thread. - -""" - assert group is None, "group argument must be None for now" - _Verbose.__init__(self, verbose) - if kwargs is None: - kwargs = {} - self.__target = target - self.__name = str(name or _newname()) - self.__args = args - self.__kwargs = kwargs - self.__daemonic = self._set_daemon() - self.__ident = None - self.__started = Event() - self.__stopped = False - self.__block = Condition(Lock()) - self.__initialized = True - # sys.stderr is not stored in the class like - # sys.exc_info since it can be changed between instances - self.__stderr = _sys.stderr - - def _reset_internal_locks(self): - # private! Called by _after_fork() to reset our internal locks as - # they may be in an invalid state leading to a deadlock or crash. - if hasattr(self, '_Thread__block'): # DummyThread deletes self.__block - self.__block.__init__() - self.__started._reset_internal_locks() - - @property - def _block(self): - # used by a unittest - return self.__block - - def _set_daemon(self): - # Overridden in _MainThread and _DummyThread - return current_thread().daemon - - def __repr__(self): - assert self.__initialized, "Thread.__init__() was not called" - status = "initial" - if self.__started.is_set(): - status = "started" - if self.__stopped: - status = "stopped" - if self.__daemonic: - status += " daemon" - if self.__ident is not None: - status += " %s" % self.__ident - return "<%s(%s, %s)>" % (self.__class__.__name__, self.__name, status) - - def start(self): - """Start the thread's activity. - - It must be called at most once per thread object. It arranges for the - object's run() method to be invoked in a separate thread of control. - - This method will raise a RuntimeError if called more than once on the - same thread object. - - """ - if not self.__initialized: - raise RuntimeError("thread.__init__() not called") - if self.__started.is_set(): - raise RuntimeError("threads can only be started once") - if __debug__: - self._note("%s.start(): starting thread", self) - with _active_limbo_lock: - _limbo[self] = self - try: - _start_new_thread(self.__bootstrap, ()) - except Exception: - with _active_limbo_lock: - del _limbo[self] - raise - self.__started.wait() - - def run(self): - """Method representing the thread's activity. - - You may override this method in a subclass. The standard run() method - invokes the callable object passed to the object's constructor as the - target argument, if any, with sequential and keyword arguments taken - from the args and kwargs arguments, respectively. - - """ - try: - if self.__target: - self.__target(*self.__args, **self.__kwargs) - finally: - # Avoid a refcycle if the thread is running a function with - # an argument that has a member that points to the thread. - del self.__target, self.__args, self.__kwargs - - def __bootstrap(self): - # Wrapper around the real bootstrap code that ignores - # exceptions during interpreter cleanup. Those typically - # happen when a daemon thread wakes up at an unfortunate - # moment, finds the world around it destroyed, and raises some - # random exception *** while trying to report the exception in - # __bootstrap_inner() below ***. Those random exceptions - # don't help anybody, and they confuse users, so we suppress - # them. We suppress them only when it appears that the world - # indeed has already been destroyed, so that exceptions in - # __bootstrap_inner() during normal business hours are properly - # reported. Also, we only suppress them for daemonic threads; - # if a non-daemonic encounters this, something else is wrong. - try: - self.__bootstrap_inner() - except: - if self.__daemonic and _sys is None: - return - raise - - def _set_ident(self): - self.__ident = _get_ident() - - def __bootstrap_inner(self): - try: - self._set_ident() - self.__started.set() - with _active_limbo_lock: - _active[self.__ident] = self - del _limbo[self] - if __debug__: - self._note("%s.__bootstrap(): thread started", self) - - if _trace_hook: - self._note("%s.__bootstrap(): registering trace hook", self) - _sys.settrace(_trace_hook) - if _profile_hook: - self._note("%s.__bootstrap(): registering profile hook", self) - _sys.setprofile(_profile_hook) - - try: - self.run() - except SystemExit: - if __debug__: - self._note("%s.__bootstrap(): raised SystemExit", self) - except: - if __debug__: - self._note("%s.__bootstrap(): unhandled exception", self) - # If sys.stderr is no more (most likely from interpreter - # shutdown) use self.__stderr. Otherwise still use sys (as in - # _sys) in case sys.stderr was redefined since the creation of - # self. - if _sys and _sys.stderr is not None: - print>>_sys.stderr, ("Exception in thread %s:\n%s" % - (self.name, _format_exc())) - elif self.__stderr is not None: - # Do the best job possible w/o a huge amt. of code to - # approximate a traceback (code ideas from - # Lib/traceback.py) - exc_type, exc_value, exc_tb = self.__exc_info() - try: - print>>self.__stderr, ( - "Exception in thread " + self.name + - " (most likely raised during interpreter shutdown):") - print>>self.__stderr, ( - "Traceback (most recent call last):") - while exc_tb: - print>>self.__stderr, ( - ' File "%s", line %s, in %s' % - (exc_tb.tb_frame.f_code.co_filename, - exc_tb.tb_lineno, - exc_tb.tb_frame.f_code.co_name)) - exc_tb = exc_tb.tb_next - print>>self.__stderr, ("%s: %s" % (exc_type, exc_value)) - # Make sure that exc_tb gets deleted since it is a memory - # hog; deleting everything else is just for thoroughness - finally: - del exc_type, exc_value, exc_tb - else: - if __debug__: - self._note("%s.__bootstrap(): normal return", self) - finally: - # Prevent a race in - # test_threading.test_no_refcycle_through_target when - # the exception keeps the target alive past when we - # assert that it's dead. - self.__exc_clear() - finally: - with _active_limbo_lock: - self.__stop() - try: - # We don't call self.__delete() because it also - # grabs _active_limbo_lock. - del _active[_get_ident()] - except: - pass - - def __stop(self): - # DummyThreads delete self.__block, but they have no waiters to - # notify anyway (join() is forbidden on them). - if not hasattr(self, '_Thread__block'): - return - self.__block.acquire() - self.__stopped = True - self.__block.notify_all() - self.__block.release() - - def __delete(self): - "Remove current thread from the dict of currently running threads." - - # Notes about running with dummy_thread: - # - # Must take care to not raise an exception if dummy_thread is being - # used (and thus this module is being used as an instance of - # dummy_threading). dummy_thread.get_ident() always returns -1 since - # there is only one thread if dummy_thread is being used. Thus - # len(_active) is always <= 1 here, and any Thread instance created - # overwrites the (if any) thread currently registered in _active. - # - # An instance of _MainThread is always created by 'threading'. This - # gets overwritten the instant an instance of Thread is created; both - # threads return -1 from dummy_thread.get_ident() and thus have the - # same key in the dict. So when the _MainThread instance created by - # 'threading' tries to clean itself up when atexit calls this method - # it gets a KeyError if another Thread instance was created. - # - # This all means that KeyError from trying to delete something from - # _active if dummy_threading is being used is a red herring. But - # since it isn't if dummy_threading is *not* being used then don't - # hide the exception. - - try: - with _active_limbo_lock: - del _active[_get_ident()] - # There must not be any python code between the previous line - # and after the lock is released. Otherwise a tracing function - # could try to acquire the lock again in the same thread, (in - # current_thread()), and would block. - except KeyError: - if 'dummy_threading' not in _sys.modules: - raise - - def join(self, timeout=None): - """Wait until the thread terminates. - - This blocks the calling thread until the thread whose join() method is - called terminates -- either normally or through an unhandled exception - or until the optional timeout occurs. - - When the timeout argument is present and not None, it should be a - floating point number specifying a timeout for the operation in seconds - (or fractions thereof). As join() always returns None, you must call - isAlive() after join() to decide whether a timeout happened -- if the - thread is still alive, the join() call timed out. - - When the timeout argument is not present or None, the operation will - block until the thread terminates. - - A thread can be join()ed many times. - - join() raises a RuntimeError if an attempt is made to join the current - thread as that would cause a deadlock. It is also an error to join() a - thread before it has been started and attempts to do so raises the same - exception. - - """ - if not self.__initialized: - raise RuntimeError("Thread.__init__() not called") - if not self.__started.is_set(): - raise RuntimeError("cannot join thread before it is started") - if self is current_thread(): - raise RuntimeError("cannot join current thread") - - if __debug__: - if not self.__stopped: - self._note("%s.join(): waiting until thread stops", self) - self.__block.acquire() - try: - if timeout is None: - while not self.__stopped: - self.__block.wait() - if __debug__: - self._note("%s.join(): thread stopped", self) - else: - deadline = _time() + timeout - while not self.__stopped: - delay = deadline - _time() - if delay <= 0: - if __debug__: - self._note("%s.join(): timed out", self) - break - self.__block.wait(delay) - else: - if __debug__: - self._note("%s.join(): thread stopped", self) - finally: - self.__block.release() - - @property - def name(self): - """A string used for identification purposes only. - - It has no semantics. Multiple threads may be given the same name. The - initial name is set by the constructor. - - """ - assert self.__initialized, "Thread.__init__() not called" - return self.__name - - @name.setter - def name(self, name): - assert self.__initialized, "Thread.__init__() not called" - self.__name = str(name) - - @property - def ident(self): - """Thread identifier of this thread or None if it has not been started. - - This is a nonzero integer. See the thread.get_ident() function. Thread - identifiers may be recycled when a thread exits and another thread is - created. The identifier is available even after the thread has exited. - - """ - assert self.__initialized, "Thread.__init__() not called" - return self.__ident - - def isAlive(self): - """Return whether the thread is alive. - - This method returns True just before the run() method starts until just - after the run() method terminates. The module function enumerate() - returns a list of all alive threads. - - """ - assert self.__initialized, "Thread.__init__() not called" - return self.__started.is_set() and not self.__stopped - - is_alive = isAlive - - @property - def daemon(self): - """A boolean value indicating whether this thread is a daemon thread (True) or not (False). - - This must be set before start() is called, otherwise RuntimeError is - raised. Its initial value is inherited from the creating thread; the - main thread is not a daemon thread and therefore all threads created in - the main thread default to daemon = False. - - The entire Python program exits when no alive non-daemon threads are - left. - - """ - assert self.__initialized, "Thread.__init__() not called" - return self.__daemonic - - @daemon.setter - def daemon(self, daemonic): - if not self.__initialized: - raise RuntimeError("Thread.__init__() not called") - if self.__started.is_set(): - raise RuntimeError("cannot set daemon status of active thread"); - self.__daemonic = daemonic - - def isDaemon(self): - return self.daemon - - def setDaemon(self, daemonic): - self.daemon = daemonic - - def getName(self): - return self.name - - def setName(self, name): - self.name = name - -# The timer class was contributed by Itamar Shtull-Trauring - -def Timer(*args, **kwargs): - """Factory function to create a Timer object. - - Timers call a function after a specified number of seconds: - - t = Timer(30.0, f, args=[], kwargs={}) - t.start() - t.cancel() # stop the timer's action if it's still waiting - - """ - return _Timer(*args, **kwargs) - -class _Timer(Thread): - """Call a function after a specified number of seconds: - - t = Timer(30.0, f, args=[], kwargs={}) - t.start() - t.cancel() # stop the timer's action if it's still waiting - - """ - - def __init__(self, interval, function, args=[], kwargs={}): - Thread.__init__(self) - self.interval = interval - self.function = function - self.args = args - self.kwargs = kwargs - self.finished = Event() - - def cancel(self): - """Stop the timer if it hasn't finished yet""" - self.finished.set() - - def run(self): - self.finished.wait(self.interval) - if not self.finished.is_set(): - self.function(*self.args, **self.kwargs) - self.finished.set() - -# Special thread class to represent the main thread -# This is garbage collected through an exit handler - -class _MainThread(Thread): - - def __init__(self): - Thread.__init__(self, name="MainThread") - self._Thread__started.set() - self._set_ident() - with _active_limbo_lock: - _active[_get_ident()] = self - - def _set_daemon(self): - return False - - def _exitfunc(self): - self._Thread__stop() - t = _pickSomeNonDaemonThread() - if t: - if __debug__: - self._note("%s: waiting for other threads", self) - while t: - t.join() - t = _pickSomeNonDaemonThread() - if __debug__: - self._note("%s: exiting", self) - self._Thread__delete() - -def _pickSomeNonDaemonThread(): - for t in enumerate(): - if not t.daemon and t.is_alive(): - return t - return None - - -# Dummy thread class to represent threads not started here. -# These aren't garbage collected when they die, nor can they be waited for. -# If they invoke anything in threading.py that calls current_thread(), they -# leave an entry in the _active dict forever after. -# Their purpose is to return *something* from current_thread(). -# They are marked as daemon threads so we won't wait for them -# when we exit (conform previous semantics). - -class _DummyThread(Thread): - - def __init__(self): - Thread.__init__(self, name=_newname("Dummy-%d")) - - # Thread.__block consumes an OS-level locking primitive, which - # can never be used by a _DummyThread. Since a _DummyThread - # instance is immortal, that's bad, so release this resource. - del self._Thread__block - - self._Thread__started.set() - self._set_ident() - with _active_limbo_lock: - _active[_get_ident()] = self - - def _set_daemon(self): - return True - - def join(self, timeout=None): - assert False, "cannot join a dummy thread" - - -# Global API functions - -def currentThread(): - """Return the current Thread object, corresponding to the caller's thread of control. - - If the caller's thread of control was not created through the threading - module, a dummy thread object with limited functionality is returned. - - """ - try: - return _active[_get_ident()] - except KeyError: - ##print "current_thread(): no current thread for", _get_ident() - return _DummyThread() - -current_thread = currentThread - -def activeCount(): - """Return the number of Thread objects currently alive. - - The returned count is equal to the length of the list returned by - enumerate(). - - """ - with _active_limbo_lock: - return len(_active) + len(_limbo) - -active_count = activeCount - -def _enumerate(): - # Same as enumerate(), but without the lock. Internal use only. - return _active.values() + _limbo.values() - -def enumerate(): - """Return a list of all Thread objects currently alive. - - The list includes daemonic threads, dummy thread objects created by - current_thread(), and the main thread. It excludes terminated threads and - threads that have not yet been started. - - """ - with _active_limbo_lock: - return _active.values() + _limbo.values() - -from thread import stack_size - -# Create the main thread object, -# and make it available for the interpreter -# (Py_Main) as threading._shutdown. - -_shutdown = _MainThread()._exitfunc - -# get thread-local implementation, either from the thread -# module, or from the python fallback - -try: - from thread import _local as local -except ImportError: - from _threading_local import local - - -def _after_fork(): - # This function is called by Python/ceval.c:PyEval_ReInitThreads which - # is called from PyOS_AfterFork. Here we cleanup threading module state - # that should not exist after a fork. - - # Reset _active_limbo_lock, in case we forked while the lock was held - # by another (non-forked) thread. http://bugs.python.org/issue874900 - global _active_limbo_lock - _active_limbo_lock = _allocate_lock() - - # fork() only copied the current thread; clear references to others. - new_active = {} - current = current_thread() - with _active_limbo_lock: - for thread in _enumerate(): - # Any lock/condition variable may be currently locked or in an - # invalid state, so we reinitialize them. - if hasattr(thread, '_reset_internal_locks'): - thread._reset_internal_locks() - if thread is current: - # There is only one active thread. We reset the ident to - # its new value since it can have changed. - ident = _get_ident() - thread._Thread__ident = ident - new_active[ident] = thread - else: - # All the others are already stopped. - thread._Thread__stop() - - _limbo.clear() - _active.clear() - _active.update(new_active) - assert len(_active) == 1 - - -# Self-test code - -def _test(): - - class BoundedQueue(_Verbose): - - def __init__(self, limit): - _Verbose.__init__(self) - self.mon = RLock() - self.rc = Condition(self.mon) - self.wc = Condition(self.mon) - self.limit = limit - self.queue = _deque() - - def put(self, item): - self.mon.acquire() - while len(self.queue) >= self.limit: - self._note("put(%s): queue full", item) - self.wc.wait() - self.queue.append(item) - self._note("put(%s): appended, length now %d", - item, len(self.queue)) - self.rc.notify() - self.mon.release() - - def get(self): - self.mon.acquire() - while not self.queue: - self._note("get(): queue empty") - self.rc.wait() - item = self.queue.popleft() - self._note("get(): got %s, %d left", item, len(self.queue)) - self.wc.notify() - self.mon.release() - return item - - class ProducerThread(Thread): - - def __init__(self, queue, quota): - Thread.__init__(self, name="Producer") - self.queue = queue - self.quota = quota - - def run(self): - from random import random - counter = 0 - while counter < self.quota: - counter = counter + 1 - self.queue.put("%s.%d" % (self.name, counter)) - _sleep(random() * 0.00001) - - - class ConsumerThread(Thread): - - def __init__(self, queue, count): - Thread.__init__(self, name="Consumer") - self.queue = queue - self.count = count - - def run(self): - while self.count > 0: - item = self.queue.get() - print item - self.count = self.count - 1 - - NP = 3 - QL = 4 - NI = 5 - - Q = BoundedQueue(QL) - P = [] - for i in range(NP): - t = ProducerThread(Q, NI) - t.name = ("Producer-%d" % (i+1)) - P.append(t) - C = ConsumerThread(Q, NI*NP) - for t in P: - t.start() - _sleep(0.000001) - C.start() - for t in P: - t.join() - C.join() - -if __name__ == '__main__': - _test() diff --git a/python/Lib/timeit.py b/python/Lib/timeit.py deleted file mode 100755 index bf0301e663..0000000000 --- a/python/Lib/timeit.py +++ /dev/null @@ -1,343 +0,0 @@ -#! /usr/bin/env python - -"""Tool for measuring execution time of small code snippets. - -This module avoids a number of common traps for measuring execution -times. See also Tim Peters' introduction to the Algorithms chapter in -the Python Cookbook, published by O'Reilly. - -Library usage: see the Timer class. - -Command line usage: - python timeit.py [-n N] [-r N] [-s S] [-t] [-c] [-h] [--] [statement] - -Options: - -n/--number N: how many times to execute 'statement' (default: see below) - -r/--repeat N: how many times to repeat the timer (default 3) - -s/--setup S: statement to be executed once initially (default 'pass') - -t/--time: use time.time() (default on Unix) - -c/--clock: use time.clock() (default on Windows) - -v/--verbose: print raw timing results; repeat for more digits precision - -h/--help: print this usage message and exit - --: separate options from statement, use when statement starts with - - statement: statement to be timed (default 'pass') - -A multi-line statement may be given by specifying each line as a -separate argument; indented lines are possible by enclosing an -argument in quotes and using leading spaces. Multiple -s options are -treated similarly. - -If -n is not given, a suitable number of loops is calculated by trying -successive powers of 10 until the total time is at least 0.2 seconds. - -The difference in default timer function is because on Windows, -clock() has microsecond granularity but time()'s granularity is 1/60th -of a second; on Unix, clock() has 1/100th of a second granularity and -time() is much more precise. On either platform, the default timer -functions measure wall clock time, not the CPU time. This means that -other processes running on the same computer may interfere with the -timing. The best thing to do when accurate timing is necessary is to -repeat the timing a few times and use the best time. The -r option is -good for this; the default of 3 repetitions is probably enough in most -cases. On Unix, you can use clock() to measure CPU time. - -Note: there is a certain baseline overhead associated with executing a -pass statement. The code here doesn't try to hide it, but you should -be aware of it. The baseline overhead can be measured by invoking the -program without arguments. - -The baseline overhead differs between Python versions! Also, to -fairly compare older Python versions to Python 2.3, you may want to -use python -O for the older versions to avoid timing SET_LINENO -instructions. -""" - -import gc -import sys -import time -try: - import itertools -except ImportError: - # Must be an older Python version (see timeit() below) - itertools = None - -__all__ = ["Timer"] - -dummy_src_name = "" -default_number = 1000000 -default_repeat = 3 - -if sys.platform == "win32": - # On Windows, the best timer is time.clock() - default_timer = time.clock -else: - # On most other platforms the best timer is time.time() - default_timer = time.time - -# Don't change the indentation of the template; the reindent() calls -# in Timer.__init__() depend on setup being indented 4 spaces and stmt -# being indented 8 spaces. -template = """ -def inner(_it, _timer%(init)s): - %(setup)s - _t0 = _timer() - for _i in _it: - %(stmt)s - _t1 = _timer() - return _t1 - _t0 -""" - -def reindent(src, indent): - """Helper to reindent a multi-line statement.""" - return src.replace("\n", "\n" + " "*indent) - -def _template_func(setup, func): - """Create a timer function. Used if the "statement" is a callable.""" - def inner(_it, _timer, _func=func): - setup() - _t0 = _timer() - for _i in _it: - _func() - _t1 = _timer() - return _t1 - _t0 - return inner - -class Timer: - """Class for timing execution speed of small code snippets. - - The constructor takes a statement to be timed, an additional - statement used for setup, and a timer function. Both statements - default to 'pass'; the timer function is platform-dependent (see - module doc string). - - To measure the execution time of the first statement, use the - timeit() method. The repeat() method is a convenience to call - timeit() multiple times and return a list of results. - - The statements may contain newlines, as long as they don't contain - multi-line string literals. - """ - - def __init__(self, stmt="pass", setup="pass", timer=default_timer): - """Constructor. See class doc string.""" - self.timer = timer - ns = {} - if isinstance(stmt, basestring): - # Check that the code can be compiled outside a function - if isinstance(setup, basestring): - compile(setup, dummy_src_name, "exec") - compile(setup + '\n' + stmt, dummy_src_name, "exec") - else: - compile(stmt, dummy_src_name, "exec") - stmt = reindent(stmt, 8) - if isinstance(setup, basestring): - setup = reindent(setup, 4) - src = template % {'stmt': stmt, 'setup': setup, 'init': ''} - elif hasattr(setup, '__call__'): - src = template % {'stmt': stmt, 'setup': '_setup()', - 'init': ', _setup=_setup'} - ns['_setup'] = setup - else: - raise ValueError("setup is neither a string nor callable") - self.src = src # Save for traceback display - code = compile(src, dummy_src_name, "exec") - exec code in globals(), ns - self.inner = ns["inner"] - elif hasattr(stmt, '__call__'): - self.src = None - if isinstance(setup, basestring): - _setup = setup - def setup(): - exec _setup in globals(), ns - elif not hasattr(setup, '__call__'): - raise ValueError("setup is neither a string nor callable") - self.inner = _template_func(setup, stmt) - else: - raise ValueError("stmt is neither a string nor callable") - - def print_exc(self, file=None): - """Helper to print a traceback from the timed code. - - Typical use: - - t = Timer(...) # outside the try/except - try: - t.timeit(...) # or t.repeat(...) - except: - t.print_exc() - - The advantage over the standard traceback is that source lines - in the compiled template will be displayed. - - The optional file argument directs where the traceback is - sent; it defaults to sys.stderr. - """ - import linecache, traceback - if self.src is not None: - linecache.cache[dummy_src_name] = (len(self.src), - None, - self.src.split("\n"), - dummy_src_name) - # else the source is already stored somewhere else - - traceback.print_exc(file=file) - - def timeit(self, number=default_number): - """Time 'number' executions of the main statement. - - To be precise, this executes the setup statement once, and - then returns the time it takes to execute the main statement - a number of times, as a float measured in seconds. The - argument is the number of times through the loop, defaulting - to one million. The main statement, the setup statement and - the timer function to be used are passed to the constructor. - """ - if itertools: - it = itertools.repeat(None, number) - else: - it = [None] * number - gcold = gc.isenabled() - gc.disable() - try: - timing = self.inner(it, self.timer) - finally: - if gcold: - gc.enable() - return timing - - def repeat(self, repeat=default_repeat, number=default_number): - """Call timeit() a few times. - - This is a convenience function that calls the timeit() - repeatedly, returning a list of results. The first argument - specifies how many times to call timeit(), defaulting to 3; - the second argument specifies the timer argument, defaulting - to one million. - - Note: it's tempting to calculate mean and standard deviation - from the result vector and report these. However, this is not - very useful. In a typical case, the lowest value gives a - lower bound for how fast your machine can run the given code - snippet; higher values in the result vector are typically not - caused by variability in Python's speed, but by other - processes interfering with your timing accuracy. So the min() - of the result is probably the only number you should be - interested in. After that, you should look at the entire - vector and apply common sense rather than statistics. - """ - r = [] - for i in range(repeat): - t = self.timeit(number) - r.append(t) - return r - -def timeit(stmt="pass", setup="pass", timer=default_timer, - number=default_number): - """Convenience function to create Timer object and call timeit method.""" - return Timer(stmt, setup, timer).timeit(number) - -def repeat(stmt="pass", setup="pass", timer=default_timer, - repeat=default_repeat, number=default_number): - """Convenience function to create Timer object and call repeat method.""" - return Timer(stmt, setup, timer).repeat(repeat, number) - -def main(args=None, _wrap_timer=None): - """Main program, used when run as a script. - - The optional 'args' argument specifies the command line to be parsed, - defaulting to sys.argv[1:]. - - The return value is an exit code to be passed to sys.exit(); it - may be None to indicate success. - - When an exception happens during timing, a traceback is printed to - stderr and the return value is 1. Exceptions at other times - (including the template compilation) are not caught. - - '_wrap_timer' is an internal interface used for unit testing. If it - is not None, it must be a callable that accepts a timer function - and returns another timer function (used for unit testing). - """ - if args is None: - args = sys.argv[1:] - import getopt - try: - opts, args = getopt.getopt(args, "n:s:r:tcvh", - ["number=", "setup=", "repeat=", - "time", "clock", "verbose", "help"]) - except getopt.error, err: - print err - print "use -h/--help for command line help" - return 2 - timer = default_timer - stmt = "\n".join(args) or "pass" - number = 0 # auto-determine - setup = [] - repeat = default_repeat - verbose = 0 - precision = 3 - for o, a in opts: - if o in ("-n", "--number"): - number = int(a) - if o in ("-s", "--setup"): - setup.append(a) - if o in ("-r", "--repeat"): - repeat = int(a) - if repeat <= 0: - repeat = 1 - if o in ("-t", "--time"): - timer = time.time - if o in ("-c", "--clock"): - timer = time.clock - if o in ("-v", "--verbose"): - if verbose: - precision += 1 - verbose += 1 - if o in ("-h", "--help"): - print __doc__, - return 0 - setup = "\n".join(setup) or "pass" - # Include the current directory, so that local imports work (sys.path - # contains the directory of this script, rather than the current - # directory) - import os - sys.path.insert(0, os.curdir) - if _wrap_timer is not None: - timer = _wrap_timer(timer) - t = Timer(stmt, setup, timer) - if number == 0: - # determine number so that 0.2 <= total time < 2.0 - for i in range(1, 10): - number = 10**i - try: - x = t.timeit(number) - except: - t.print_exc() - return 1 - if verbose: - print "%d loops -> %.*g secs" % (number, precision, x) - if x >= 0.2: - break - try: - r = t.repeat(repeat, number) - except: - t.print_exc() - return 1 - best = min(r) - if verbose: - print "raw times:", " ".join(["%.*g" % (precision, x) for x in r]) - print "%d loops," % number, - usec = best * 1e6 / number - if usec < 1000: - print "best of %d: %.*g usec per loop" % (repeat, precision, usec) - else: - msec = usec / 1000 - if msec < 1000: - print "best of %d: %.*g msec per loop" % (repeat, precision, msec) - else: - sec = msec / 1000 - print "best of %d: %.*g sec per loop" % (repeat, precision, sec) - return None - -if __name__ == "__main__": - sys.exit(main()) diff --git a/python/Lib/toaiff.py b/python/Lib/toaiff.py deleted file mode 100755 index 3a2b80cbe3..0000000000 --- a/python/Lib/toaiff.py +++ /dev/null @@ -1,110 +0,0 @@ -"""Convert "arbitrary" sound files to AIFF (Apple and SGI's audio format). - -Input may be compressed. -Uncompressed file type may be AIFF, WAV, VOC, 8SVX, NeXT/Sun, and others. -An exception is raised if the file is not of a recognized type. -Returned filename is either the input filename or a temporary filename; -in the latter case the caller must ensure that it is removed. -Other temporary files used are removed by the function. -""" -from warnings import warnpy3k -warnpy3k("the toaiff module has been removed in Python 3.0", stacklevel=2) -del warnpy3k - -import os -import tempfile -import pipes -import sndhdr - -__all__ = ["error", "toaiff"] - -table = {} - -t = pipes.Template() -t.append('sox -t au - -t aiff -r 8000 -', '--') -table['au'] = t - -# XXX The following is actually sub-optimal. -# XXX The HCOM sampling rate can be 22k, 22k/2, 22k/3 or 22k/4. -# XXX We must force the output sampling rate else the SGI won't play -# XXX files sampled at 5.5k or 7.333k; however this means that files -# XXX sampled at 11k are unnecessarily expanded. -# XXX Similar comments apply to some other file types. -t = pipes.Template() -t.append('sox -t hcom - -t aiff -r 22050 -', '--') -table['hcom'] = t - -t = pipes.Template() -t.append('sox -t voc - -t aiff -r 11025 -', '--') -table['voc'] = t - -t = pipes.Template() -t.append('sox -t wav - -t aiff -', '--') -table['wav'] = t - -t = pipes.Template() -t.append('sox -t 8svx - -t aiff -r 16000 -', '--') -table['8svx'] = t - -t = pipes.Template() -t.append('sox -t sndt - -t aiff -r 16000 -', '--') -table['sndt'] = t - -t = pipes.Template() -t.append('sox -t sndr - -t aiff -r 16000 -', '--') -table['sndr'] = t - -uncompress = pipes.Template() -uncompress.append('uncompress', '--') - - -class error(Exception): - pass - -def toaiff(filename): - temps = [] - ret = None - try: - ret = _toaiff(filename, temps) - finally: - for temp in temps[:]: - if temp != ret: - try: - os.unlink(temp) - except os.error: - pass - temps.remove(temp) - return ret - -def _toaiff(filename, temps): - if filename[-2:] == '.Z': - (fd, fname) = tempfile.mkstemp() - os.close(fd) - temps.append(fname) - sts = uncompress.copy(filename, fname) - if sts: - raise error, filename + ': uncompress failed' - else: - fname = filename - try: - ftype = sndhdr.whathdr(fname) - if ftype: - ftype = ftype[0] # All we're interested in - except IOError, msg: - if type(msg) == type(()) and len(msg) == 2 and \ - type(msg[0]) == type(0) and type(msg[1]) == type(''): - msg = msg[1] - if type(msg) != type(''): - msg = repr(msg) - raise error, filename + ': ' + msg - if ftype == 'aiff': - return fname - if ftype is None or not ftype in table: - raise error, '%s: unsupported audio file type %r' % (filename, ftype) - (fd, temp) = tempfile.mkstemp() - os.close(fd) - temps.append(temp) - sts = table[ftype].copy(fname, temp) - if sts: - raise error, filename + ': conversion to aiff failed' - return temp diff --git a/python/Lib/token.py b/python/Lib/token.py deleted file mode 100755 index 45825b4a77..0000000000 --- a/python/Lib/token.py +++ /dev/null @@ -1,140 +0,0 @@ -"""Token constants (from "token.h").""" - -# This file is automatically generated; please don't muck it up! -# -# To update the symbols in this file, 'cd' to the top directory of -# the python source tree after building the interpreter and run: -# -# ./python Lib/token.py - -#--start constants-- -ENDMARKER = 0 -NAME = 1 -NUMBER = 2 -STRING = 3 -NEWLINE = 4 -INDENT = 5 -DEDENT = 6 -LPAR = 7 -RPAR = 8 -LSQB = 9 -RSQB = 10 -COLON = 11 -COMMA = 12 -SEMI = 13 -PLUS = 14 -MINUS = 15 -STAR = 16 -SLASH = 17 -VBAR = 18 -AMPER = 19 -LESS = 20 -GREATER = 21 -EQUAL = 22 -DOT = 23 -PERCENT = 24 -BACKQUOTE = 25 -LBRACE = 26 -RBRACE = 27 -EQEQUAL = 28 -NOTEQUAL = 29 -LESSEQUAL = 30 -GREATEREQUAL = 31 -TILDE = 32 -CIRCUMFLEX = 33 -LEFTSHIFT = 34 -RIGHTSHIFT = 35 -DOUBLESTAR = 36 -PLUSEQUAL = 37 -MINEQUAL = 38 -STAREQUAL = 39 -SLASHEQUAL = 40 -PERCENTEQUAL = 41 -AMPEREQUAL = 42 -VBAREQUAL = 43 -CIRCUMFLEXEQUAL = 44 -LEFTSHIFTEQUAL = 45 -RIGHTSHIFTEQUAL = 46 -DOUBLESTAREQUAL = 47 -DOUBLESLASH = 48 -DOUBLESLASHEQUAL = 49 -AT = 50 -OP = 51 -ERRORTOKEN = 52 -N_TOKENS = 53 -NT_OFFSET = 256 -#--end constants-- - -tok_name = {} -for _name, _value in globals().items(): - if type(_value) is type(0): - tok_name[_value] = _name -del _name, _value - - -def ISTERMINAL(x): - return x < NT_OFFSET - -def ISNONTERMINAL(x): - return x >= NT_OFFSET - -def ISEOF(x): - return x == ENDMARKER - - -def main(): - import re - import sys - args = sys.argv[1:] - inFileName = args and args[0] or "Include/token.h" - outFileName = "Lib/token.py" - if len(args) > 1: - outFileName = args[1] - try: - fp = open(inFileName) - except IOError, err: - sys.stdout.write("I/O error: %s\n" % str(err)) - sys.exit(1) - lines = fp.read().split("\n") - fp.close() - prog = re.compile( - "#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)", - re.IGNORECASE) - tokens = {} - for line in lines: - match = prog.match(line) - if match: - name, val = match.group(1, 2) - val = int(val) - tokens[val] = name # reverse so we can sort them... - keys = tokens.keys() - keys.sort() - # load the output skeleton from the target: - try: - fp = open(outFileName) - except IOError, err: - sys.stderr.write("I/O error: %s\n" % str(err)) - sys.exit(2) - format = fp.read().split("\n") - fp.close() - try: - start = format.index("#--start constants--") + 1 - end = format.index("#--end constants--") - except ValueError: - sys.stderr.write("target does not contain format markers") - sys.exit(3) - lines = [] - for val in keys: - lines.append("%s = %d" % (tokens[val], val)) - format[start:end] = lines - try: - fp = open(outFileName, 'w') - except IOError, err: - sys.stderr.write("I/O error: %s\n" % str(err)) - sys.exit(4) - fp.write("\n".join(format)) - fp.close() - - -if __name__ == "__main__": - main() diff --git a/python/Lib/tokenize.py b/python/Lib/tokenize.py deleted file mode 100755 index d426cd2df5..0000000000 --- a/python/Lib/tokenize.py +++ /dev/null @@ -1,449 +0,0 @@ -"""Tokenization help for Python programs. - -generate_tokens(readline) is a generator that breaks a stream of -text into Python tokens. It accepts a readline-like method which is called -repeatedly to get the next line of input (or "" for EOF). It generates -5-tuples with these members: - - the token type (see token.py) - the token (a string) - the starting (row, column) indices of the token (a 2-tuple of ints) - the ending (row, column) indices of the token (a 2-tuple of ints) - the original line (string) - -It is designed to match the working of the Python tokenizer exactly, except -that it produces COMMENT tokens for comments and gives type OP for all -operators - -Older entry points - tokenize_loop(readline, tokeneater) - tokenize(readline, tokeneater=printtoken) -are the same, except instead of generating tokens, tokeneater is a callback -function to which the 5 fields described above are passed as 5 arguments, -each time a new token is found.""" - -__author__ = 'Ka-Ping Yee ' -__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' - 'Skip Montanaro, Raymond Hettinger') - -from itertools import chain -import string, re -from token import * - -import token -__all__ = [x for x in dir(token) if not x.startswith("_")] -__all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"] -del x -del token - -COMMENT = N_TOKENS -tok_name[COMMENT] = 'COMMENT' -NL = N_TOKENS + 1 -tok_name[NL] = 'NL' -N_TOKENS += 2 - -def group(*choices): return '(' + '|'.join(choices) + ')' -def any(*choices): return group(*choices) + '*' -def maybe(*choices): return group(*choices) + '?' - -Whitespace = r'[ \f\t]*' -Comment = r'#[^\r\n]*' -Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) -Name = r'[a-zA-Z_]\w*' - -Hexnumber = r'0[xX][\da-fA-F]+[lL]?' -Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?' -Binnumber = r'0[bB][01]+[lL]?' -Decnumber = r'[1-9]\d*[lL]?' -Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) -Exponent = r'[eE][-+]?\d+' -Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent) -Expfloat = r'\d+' + Exponent -Floatnumber = group(Pointfloat, Expfloat) -Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]') -Number = group(Imagnumber, Floatnumber, Intnumber) - -# Tail end of ' string. -Single = r"[^'\\]*(?:\\.[^'\\]*)*'" -# Tail end of " string. -Double = r'[^"\\]*(?:\\.[^"\\]*)*"' -# Tail end of ''' string. -Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" -# Tail end of """ string. -Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' -Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""') -# Single-line ' or " string. -String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'", - r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"') - -# Because of leftmost-then-longest match semantics, be sure to put the -# longest operators first (e.g., if = came before ==, == would get -# recognized as two instances of =). -Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=", - r"//=?", - r"[+\-*/%&|^=<>]=?", - r"~") - -Bracket = '[][(){}]' -Special = group(r'\r?\n', r'[:;.,`@]') -Funny = group(Operator, Bracket, Special) - -PlainToken = group(Number, Funny, String, Name) -Token = Ignore + PlainToken - -# First (or only) line of ' or " string. -ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + - group("'", r'\\\r?\n'), - r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + - group('"', r'\\\r?\n')) -PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple) -PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) - -tokenprog, pseudoprog, single3prog, double3prog = map( - re.compile, (Token, PseudoToken, Single3, Double3)) -endprogs = {"'": re.compile(Single), '"': re.compile(Double), - "'''": single3prog, '"""': double3prog, - "r'''": single3prog, 'r"""': double3prog, - "u'''": single3prog, 'u"""': double3prog, - "ur'''": single3prog, 'ur"""': double3prog, - "R'''": single3prog, 'R"""': double3prog, - "U'''": single3prog, 'U"""': double3prog, - "uR'''": single3prog, 'uR"""': double3prog, - "Ur'''": single3prog, 'Ur"""': double3prog, - "UR'''": single3prog, 'UR"""': double3prog, - "b'''": single3prog, 'b"""': double3prog, - "br'''": single3prog, 'br"""': double3prog, - "B'''": single3prog, 'B"""': double3prog, - "bR'''": single3prog, 'bR"""': double3prog, - "Br'''": single3prog, 'Br"""': double3prog, - "BR'''": single3prog, 'BR"""': double3prog, - 'r': None, 'R': None, 'u': None, 'U': None, - 'b': None, 'B': None} - -triple_quoted = {} -for t in ("'''", '"""', - "r'''", 'r"""', "R'''", 'R"""', - "u'''", 'u"""', "U'''", 'U"""', - "ur'''", 'ur"""', "Ur'''", 'Ur"""', - "uR'''", 'uR"""', "UR'''", 'UR"""', - "b'''", 'b"""', "B'''", 'B"""', - "br'''", 'br"""', "Br'''", 'Br"""', - "bR'''", 'bR"""', "BR'''", 'BR"""'): - triple_quoted[t] = t -single_quoted = {} -for t in ("'", '"', - "r'", 'r"', "R'", 'R"', - "u'", 'u"', "U'", 'U"', - "ur'", 'ur"', "Ur'", 'Ur"', - "uR'", 'uR"', "UR'", 'UR"', - "b'", 'b"', "B'", 'B"', - "br'", 'br"', "Br'", 'Br"', - "bR'", 'bR"', "BR'", 'BR"' ): - single_quoted[t] = t - -tabsize = 8 - -class TokenError(Exception): pass - -class StopTokenizing(Exception): pass - -def printtoken(type, token, srow_scol, erow_ecol, line): # for testing - srow, scol = srow_scol - erow, ecol = erow_ecol - print "%d,%d-%d,%d:\t%s\t%s" % \ - (srow, scol, erow, ecol, tok_name[type], repr(token)) - -def tokenize(readline, tokeneater=printtoken): - """ - The tokenize() function accepts two parameters: one representing the - input stream, and one providing an output mechanism for tokenize(). - - The first parameter, readline, must be a callable object which provides - the same interface as the readline() method of built-in file objects. - Each call to the function should return one line of input as a string. - - The second parameter, tokeneater, must also be a callable object. It is - called once for each token, with five arguments, corresponding to the - tuples generated by generate_tokens(). - """ - try: - tokenize_loop(readline, tokeneater) - except StopTokenizing: - pass - -# backwards compatible interface -def tokenize_loop(readline, tokeneater): - for token_info in generate_tokens(readline): - tokeneater(*token_info) - -class Untokenizer: - - def __init__(self): - self.tokens = [] - self.prev_row = 1 - self.prev_col = 0 - - def add_whitespace(self, start): - row, col = start - if row < self.prev_row or row == self.prev_row and col < self.prev_col: - raise ValueError("start ({},{}) precedes previous end ({},{})" - .format(row, col, self.prev_row, self.prev_col)) - row_offset = row - self.prev_row - if row_offset: - self.tokens.append("\\\n" * row_offset) - self.prev_col = 0 - col_offset = col - self.prev_col - if col_offset: - self.tokens.append(" " * col_offset) - - def untokenize(self, iterable): - it = iter(iterable) - indents = [] - startline = False - for t in it: - if len(t) == 2: - self.compat(t, it) - break - tok_type, token, start, end, line = t - if tok_type == ENDMARKER: - break - if tok_type == INDENT: - indents.append(token) - continue - elif tok_type == DEDENT: - indents.pop() - self.prev_row, self.prev_col = end - continue - elif tok_type in (NEWLINE, NL): - startline = True - elif startline and indents: - indent = indents[-1] - if start[1] >= len(indent): - self.tokens.append(indent) - self.prev_col = len(indent) - startline = False - self.add_whitespace(start) - self.tokens.append(token) - self.prev_row, self.prev_col = end - if tok_type in (NEWLINE, NL): - self.prev_row += 1 - self.prev_col = 0 - return "".join(self.tokens) - - def compat(self, token, iterable): - indents = [] - toks_append = self.tokens.append - startline = token[0] in (NEWLINE, NL) - prevstring = False - - for tok in chain([token], iterable): - toknum, tokval = tok[:2] - - if toknum in (NAME, NUMBER): - tokval += ' ' - - # Insert a space between two consecutive strings - if toknum == STRING: - if prevstring: - tokval = ' ' + tokval - prevstring = True - else: - prevstring = False - - if toknum == INDENT: - indents.append(tokval) - continue - elif toknum == DEDENT: - indents.pop() - continue - elif toknum in (NEWLINE, NL): - startline = True - elif startline and indents: - toks_append(indents[-1]) - startline = False - toks_append(tokval) - -def untokenize(iterable): - """Transform tokens back into Python source code. - - Each element returned by the iterable must be a token sequence - with at least two elements, a token number and token value. If - only two tokens are passed, the resulting output is poor. - - Round-trip invariant for full input: - Untokenized source will match input source exactly - - Round-trip invariant for limited intput: - # Output text will tokenize the back to the input - t1 = [tok[:2] for tok in generate_tokens(f.readline)] - newcode = untokenize(t1) - readline = iter(newcode.splitlines(1)).next - t2 = [tok[:2] for tok in generate_tokens(readline)] - assert t1 == t2 - """ - ut = Untokenizer() - return ut.untokenize(iterable) - -def generate_tokens(readline): - """ - The generate_tokens() generator requires one argument, readline, which - must be a callable object which provides the same interface as the - readline() method of built-in file objects. Each call to the function - should return one line of input as a string. Alternately, readline - can be a callable function terminating with StopIteration: - readline = open(myfile).next # Example of alternate readline - - The generator produces 5-tuples with these members: the token type; the - token string; a 2-tuple (srow, scol) of ints specifying the row and - column where the token begins in the source; a 2-tuple (erow, ecol) of - ints specifying the row and column where the token ends in the source; - and the line on which the token was found. The line passed is the - logical line; continuation lines are included. - """ - lnum = parenlev = continued = 0 - namechars, numchars = string.ascii_letters + '_', '0123456789' - contstr, needcont = '', 0 - contline = None - indents = [0] - - while 1: # loop over lines in stream - try: - line = readline() - except StopIteration: - line = '' - lnum += 1 - pos, max = 0, len(line) - - if contstr: # continued string - if not line: - raise TokenError, ("EOF in multi-line string", strstart) - endmatch = endprog.match(line) - if endmatch: - pos = end = endmatch.end(0) - yield (STRING, contstr + line[:end], - strstart, (lnum, end), contline + line) - contstr, needcont = '', 0 - contline = None - elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': - yield (ERRORTOKEN, contstr + line, - strstart, (lnum, len(line)), contline) - contstr = '' - contline = None - continue - else: - contstr = contstr + line - contline = contline + line - continue - - elif parenlev == 0 and not continued: # new statement - if not line: break - column = 0 - while pos < max: # measure leading whitespace - if line[pos] == ' ': - column += 1 - elif line[pos] == '\t': - column = (column//tabsize + 1)*tabsize - elif line[pos] == '\f': - column = 0 - else: - break - pos += 1 - if pos == max: - break - - if line[pos] in '#\r\n': # skip comments or blank lines - if line[pos] == '#': - comment_token = line[pos:].rstrip('\r\n') - nl_pos = pos + len(comment_token) - yield (COMMENT, comment_token, - (lnum, pos), (lnum, pos + len(comment_token)), line) - yield (NL, line[nl_pos:], - (lnum, nl_pos), (lnum, len(line)), line) - else: - yield ((NL, COMMENT)[line[pos] == '#'], line[pos:], - (lnum, pos), (lnum, len(line)), line) - continue - - if column > indents[-1]: # count indents or dedents - indents.append(column) - yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line) - while column < indents[-1]: - if column not in indents: - raise IndentationError( - "unindent does not match any outer indentation level", - ("", lnum, pos, line)) - indents = indents[:-1] - yield (DEDENT, '', (lnum, pos), (lnum, pos), line) - - else: # continued statement - if not line: - raise TokenError, ("EOF in multi-line statement", (lnum, 0)) - continued = 0 - - while pos < max: - pseudomatch = pseudoprog.match(line, pos) - if pseudomatch: # scan for tokens - start, end = pseudomatch.span(1) - spos, epos, pos = (lnum, start), (lnum, end), end - if start == end: - continue - token, initial = line[start:end], line[start] - - if initial in numchars or \ - (initial == '.' and token != '.'): # ordinary number - yield (NUMBER, token, spos, epos, line) - elif initial in '\r\n': - yield (NL if parenlev > 0 else NEWLINE, - token, spos, epos, line) - elif initial == '#': - assert not token.endswith("\n") - yield (COMMENT, token, spos, epos, line) - elif token in triple_quoted: - endprog = endprogs[token] - endmatch = endprog.match(line, pos) - if endmatch: # all on one line - pos = endmatch.end(0) - token = line[start:pos] - yield (STRING, token, spos, (lnum, pos), line) - else: - strstart = (lnum, start) # multiple lines - contstr = line[start:] - contline = line - break - elif initial in single_quoted or \ - token[:2] in single_quoted or \ - token[:3] in single_quoted: - if token[-1] == '\n': # continued string - strstart = (lnum, start) - endprog = (endprogs[initial] or endprogs[token[1]] or - endprogs[token[2]]) - contstr, needcont = line[start:], 1 - contline = line - break - else: # ordinary string - yield (STRING, token, spos, epos, line) - elif initial in namechars: # ordinary name - yield (NAME, token, spos, epos, line) - elif initial == '\\': # continued stmt - continued = 1 - else: - if initial in '([{': - parenlev += 1 - elif initial in ')]}': - parenlev -= 1 - yield (OP, token, spos, epos, line) - else: - yield (ERRORTOKEN, line[pos], - (lnum, pos), (lnum, pos+1), line) - pos += 1 - - for indent in indents[1:]: # pop remaining indent levels - yield (DEDENT, '', (lnum, 0), (lnum, 0), '') - yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '') - -if __name__ == '__main__': # testing - import sys - if len(sys.argv) > 1: - tokenize(open(sys.argv[1]).readline) - else: - tokenize(sys.stdin.readline) diff --git a/python/Lib/trace.py b/python/Lib/trace.py deleted file mode 100755 index 38a13e2a9f..0000000000 --- a/python/Lib/trace.py +++ /dev/null @@ -1,819 +0,0 @@ -#!/usr/bin/env python - -# portions copyright 2001, Autonomous Zones Industries, Inc., all rights... -# err... reserved and offered to the public under the terms of the -# Python 2.2 license. -# Author: Zooko O'Whielacronx -# http://zooko.com/ -# mailto:zooko@zooko.com -# -# Copyright 2000, Mojam Media, Inc., all rights reserved. -# Author: Skip Montanaro -# -# Copyright 1999, Bioreason, Inc., all rights reserved. -# Author: Andrew Dalke -# -# Copyright 1995-1997, Automatrix, Inc., all rights reserved. -# Author: Skip Montanaro -# -# Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved. -# -# -# Permission to use, copy, modify, and distribute this Python software and -# its associated documentation for any purpose without fee is hereby -# granted, provided that the above copyright notice appears in all copies, -# and that both that copyright notice and this permission notice appear in -# supporting documentation, and that the name of neither Automatrix, -# Bioreason or Mojam Media be used in advertising or publicity pertaining to -# distribution of the software without specific, written prior permission. -# -"""program/module to trace Python program or function execution - -Sample use, command line: - trace.py -c -f counts --ignore-dir '$prefix' spam.py eggs - trace.py -t --ignore-dir '$prefix' spam.py eggs - trace.py --trackcalls spam.py eggs - -Sample use, programmatically - import sys - - # create a Trace object, telling it what to ignore, and whether to - # do tracing or line-counting or both. - tracer = trace.Trace(ignoredirs=[sys.prefix, sys.exec_prefix,], trace=0, - count=1) - # run the new command using the given tracer - tracer.run('main()') - # make a report, placing output in /tmp - r = tracer.results() - r.write_results(show_missing=True, coverdir="/tmp") -""" - -import linecache -import os -import re -import sys -import time -import token -import tokenize -import inspect -import gc -import dis -try: - import cPickle - pickle = cPickle -except ImportError: - import pickle - -try: - import threading -except ImportError: - _settrace = sys.settrace - - def _unsettrace(): - sys.settrace(None) -else: - def _settrace(func): - threading.settrace(func) - sys.settrace(func) - - def _unsettrace(): - sys.settrace(None) - threading.settrace(None) - -def usage(outfile): - outfile.write("""Usage: %s [OPTIONS] [ARGS] - -Meta-options: ---help Display this help then exit. ---version Output version information then exit. - -Otherwise, exactly one of the following three options must be given: --t, --trace Print each line to sys.stdout before it is executed. --c, --count Count the number of times each line is executed - and write the counts to .cover for each - module executed, in the module's directory. - See also `--coverdir', `--file', `--no-report' below. --l, --listfuncs Keep track of which functions are executed at least - once and write the results to sys.stdout after the - program exits. --T, --trackcalls Keep track of caller/called pairs and write the - results to sys.stdout after the program exits. --r, --report Generate a report from a counts file; do not execute - any code. `--file' must specify the results file to - read, which must have been created in a previous run - with `--count --file=FILE'. - -Modifiers: --f, --file= File to accumulate counts over several runs. --R, --no-report Do not generate the coverage report files. - Useful if you want to accumulate over several runs. --C, --coverdir= Directory where the report files. The coverage - report for . is written to file - //.cover. --m, --missing Annotate executable lines that were not executed - with '>>>>>> '. --s, --summary Write a brief summary on stdout for each file. - (Can only be used with --count or --report.) --g, --timing Prefix each line with the time since the program started. - Only used while tracing. - -Filters, may be repeated multiple times: ---ignore-module= Ignore the given module(s) and its submodules - (if it is a package). Accepts comma separated - list of module names ---ignore-dir= Ignore files in the given directory (multiple - directories can be joined by os.pathsep). -""" % sys.argv[0]) - -PRAGMA_NOCOVER = "#pragma NO COVER" - -# Simple rx to find lines with no code. -rx_blank = re.compile(r'^\s*(#.*)?$') - -class Ignore: - def __init__(self, modules = None, dirs = None): - self._mods = modules or [] - self._dirs = dirs or [] - - self._dirs = map(os.path.normpath, self._dirs) - self._ignore = { '': 1 } - - def names(self, filename, modulename): - if modulename in self._ignore: - return self._ignore[modulename] - - # haven't seen this one before, so see if the module name is - # on the ignore list. Need to take some care since ignoring - # "cmp" musn't mean ignoring "cmpcache" but ignoring - # "Spam" must also mean ignoring "Spam.Eggs". - for mod in self._mods: - if mod == modulename: # Identical names, so ignore - self._ignore[modulename] = 1 - return 1 - # check if the module is a proper submodule of something on - # the ignore list - n = len(mod) - # (will not overflow since if the first n characters are the - # same and the name has not already occurred, then the size - # of "name" is greater than that of "mod") - if mod == modulename[:n] and modulename[n] == '.': - self._ignore[modulename] = 1 - return 1 - - # Now check that __file__ isn't in one of the directories - if filename is None: - # must be a built-in, so we must ignore - self._ignore[modulename] = 1 - return 1 - - # Ignore a file when it contains one of the ignorable paths - for d in self._dirs: - # The '+ os.sep' is to ensure that d is a parent directory, - # as compared to cases like: - # d = "/usr/local" - # filename = "/usr/local.py" - # or - # d = "/usr/local.py" - # filename = "/usr/local.py" - if filename.startswith(d + os.sep): - self._ignore[modulename] = 1 - return 1 - - # Tried the different ways, so we don't ignore this module - self._ignore[modulename] = 0 - return 0 - -def modname(path): - """Return a plausible module name for the patch.""" - - base = os.path.basename(path) - filename, ext = os.path.splitext(base) - return filename - -def fullmodname(path): - """Return a plausible module name for the path.""" - - # If the file 'path' is part of a package, then the filename isn't - # enough to uniquely identify it. Try to do the right thing by - # looking in sys.path for the longest matching prefix. We'll - # assume that the rest is the package name. - - comparepath = os.path.normcase(path) - longest = "" - for dir in sys.path: - dir = os.path.normcase(dir) - if comparepath.startswith(dir) and comparepath[len(dir)] == os.sep: - if len(dir) > len(longest): - longest = dir - - if longest: - base = path[len(longest) + 1:] - else: - base = path - # the drive letter is never part of the module name - drive, base = os.path.splitdrive(base) - base = base.replace(os.sep, ".") - if os.altsep: - base = base.replace(os.altsep, ".") - filename, ext = os.path.splitext(base) - return filename.lstrip(".") - -class CoverageResults: - def __init__(self, counts=None, calledfuncs=None, infile=None, - callers=None, outfile=None): - self.counts = counts - if self.counts is None: - self.counts = {} - self.counter = self.counts.copy() # map (filename, lineno) to count - self.calledfuncs = calledfuncs - if self.calledfuncs is None: - self.calledfuncs = {} - self.calledfuncs = self.calledfuncs.copy() - self.callers = callers - if self.callers is None: - self.callers = {} - self.callers = self.callers.copy() - self.infile = infile - self.outfile = outfile - if self.infile: - # Try to merge existing counts file. - try: - counts, calledfuncs, callers = \ - pickle.load(open(self.infile, 'rb')) - self.update(self.__class__(counts, calledfuncs, callers)) - except (IOError, EOFError, ValueError), err: - print >> sys.stderr, ("Skipping counts file %r: %s" - % (self.infile, err)) - - def update(self, other): - """Merge in the data from another CoverageResults""" - counts = self.counts - calledfuncs = self.calledfuncs - callers = self.callers - other_counts = other.counts - other_calledfuncs = other.calledfuncs - other_callers = other.callers - - for key in other_counts.keys(): - counts[key] = counts.get(key, 0) + other_counts[key] - - for key in other_calledfuncs.keys(): - calledfuncs[key] = 1 - - for key in other_callers.keys(): - callers[key] = 1 - - def write_results(self, show_missing=True, summary=False, coverdir=None): - """ - @param coverdir - """ - if self.calledfuncs: - print - print "functions called:" - calls = self.calledfuncs.keys() - calls.sort() - for filename, modulename, funcname in calls: - print ("filename: %s, modulename: %s, funcname: %s" - % (filename, modulename, funcname)) - - if self.callers: - print - print "calling relationships:" - calls = self.callers.keys() - calls.sort() - lastfile = lastcfile = "" - for ((pfile, pmod, pfunc), (cfile, cmod, cfunc)) in calls: - if pfile != lastfile: - print - print "***", pfile, "***" - lastfile = pfile - lastcfile = "" - if cfile != pfile and lastcfile != cfile: - print " -->", cfile - lastcfile = cfile - print " %s.%s -> %s.%s" % (pmod, pfunc, cmod, cfunc) - - # turn the counts data ("(filename, lineno) = count") into something - # accessible on a per-file basis - per_file = {} - for filename, lineno in self.counts.keys(): - lines_hit = per_file[filename] = per_file.get(filename, {}) - lines_hit[lineno] = self.counts[(filename, lineno)] - - # accumulate summary info, if needed - sums = {} - - for filename, count in per_file.iteritems(): - # skip some "files" we don't care about... - if filename == "": - continue - if filename.startswith("> sys.stderr, "Can't save counts files because %s" % err - - def write_results_file(self, path, lines, lnotab, lines_hit): - """Return a coverage results file in path.""" - - try: - outfile = open(path, "w") - except IOError, err: - print >> sys.stderr, ("trace: Could not open %r for writing: %s" - "- skipping" % (path, err)) - return 0, 0 - - n_lines = 0 - n_hits = 0 - for i, line in enumerate(lines): - lineno = i + 1 - # do the blank/comment match to try to mark more lines - # (help the reader find stuff that hasn't been covered) - if lineno in lines_hit: - outfile.write("%5d: " % lines_hit[lineno]) - n_hits += 1 - n_lines += 1 - elif rx_blank.match(line): - outfile.write(" ") - else: - # lines preceded by no marks weren't hit - # Highlight them if so indicated, unless the line contains - # #pragma: NO COVER - if lineno in lnotab and not PRAGMA_NOCOVER in lines[i]: - outfile.write(">>>>>> ") - n_lines += 1 - else: - outfile.write(" ") - outfile.write(lines[i].expandtabs(8)) - outfile.close() - - return n_hits, n_lines - -def find_lines_from_code(code, strs): - """Return dict where keys are lines in the line number table.""" - linenos = {} - - for _, lineno in dis.findlinestarts(code): - if lineno not in strs: - linenos[lineno] = 1 - - return linenos - -def find_lines(code, strs): - """Return lineno dict for all code objects reachable from code.""" - # get all of the lineno information from the code of this scope level - linenos = find_lines_from_code(code, strs) - - # and check the constants for references to other code objects - for c in code.co_consts: - if inspect.iscode(c): - # find another code object, so recurse into it - linenos.update(find_lines(c, strs)) - return linenos - -def find_strings(filename): - """Return a dict of possible docstring positions. - - The dict maps line numbers to strings. There is an entry for - line that contains only a string or a part of a triple-quoted - string. - """ - d = {} - # If the first token is a string, then it's the module docstring. - # Add this special case so that the test in the loop passes. - prev_ttype = token.INDENT - f = open(filename) - for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline): - if ttype == token.STRING: - if prev_ttype == token.INDENT: - sline, scol = start - eline, ecol = end - for i in range(sline, eline + 1): - d[i] = 1 - prev_ttype = ttype - f.close() - return d - -def find_executable_linenos(filename): - """Return dict where keys are line numbers in the line number table.""" - try: - prog = open(filename, "rU").read() - except IOError, err: - print >> sys.stderr, ("Not printing coverage data for %r: %s" - % (filename, err)) - return {} - code = compile(prog, filename, "exec") - strs = find_strings(filename) - return find_lines(code, strs) - -class Trace: - def __init__(self, count=1, trace=1, countfuncs=0, countcallers=0, - ignoremods=(), ignoredirs=(), infile=None, outfile=None, - timing=False): - """ - @param count true iff it should count number of times each - line is executed - @param trace true iff it should print out each line that is - being counted - @param countfuncs true iff it should just output a list of - (filename, modulename, funcname,) for functions - that were called at least once; This overrides - `count' and `trace' - @param ignoremods a list of the names of modules to ignore - @param ignoredirs a list of the names of directories to ignore - all of the (recursive) contents of - @param infile file from which to read stored counts to be - added into the results - @param outfile file in which to write the results - @param timing true iff timing information be displayed - """ - self.infile = infile - self.outfile = outfile - self.ignore = Ignore(ignoremods, ignoredirs) - self.counts = {} # keys are (filename, linenumber) - self.blabbed = {} # for debugging - self.pathtobasename = {} # for memoizing os.path.basename - self.donothing = 0 - self.trace = trace - self._calledfuncs = {} - self._callers = {} - self._caller_cache = {} - self.start_time = None - if timing: - self.start_time = time.time() - if countcallers: - self.globaltrace = self.globaltrace_trackcallers - elif countfuncs: - self.globaltrace = self.globaltrace_countfuncs - elif trace and count: - self.globaltrace = self.globaltrace_lt - self.localtrace = self.localtrace_trace_and_count - elif trace: - self.globaltrace = self.globaltrace_lt - self.localtrace = self.localtrace_trace - elif count: - self.globaltrace = self.globaltrace_lt - self.localtrace = self.localtrace_count - else: - # Ahem -- do nothing? Okay. - self.donothing = 1 - - def run(self, cmd): - import __main__ - dict = __main__.__dict__ - self.runctx(cmd, dict, dict) - - def runctx(self, cmd, globals=None, locals=None): - if globals is None: globals = {} - if locals is None: locals = {} - if not self.donothing: - _settrace(self.globaltrace) - try: - exec cmd in globals, locals - finally: - if not self.donothing: - _unsettrace() - - def runfunc(self, func, *args, **kw): - result = None - if not self.donothing: - sys.settrace(self.globaltrace) - try: - result = func(*args, **kw) - finally: - if not self.donothing: - sys.settrace(None) - return result - - def file_module_function_of(self, frame): - code = frame.f_code - filename = code.co_filename - if filename: - modulename = modname(filename) - else: - modulename = None - - funcname = code.co_name - clsname = None - if code in self._caller_cache: - if self._caller_cache[code] is not None: - clsname = self._caller_cache[code] - else: - self._caller_cache[code] = None - ## use of gc.get_referrers() was suggested by Michael Hudson - # all functions which refer to this code object - funcs = [f for f in gc.get_referrers(code) - if inspect.isfunction(f)] - # require len(func) == 1 to avoid ambiguity caused by calls to - # new.function(): "In the face of ambiguity, refuse the - # temptation to guess." - if len(funcs) == 1: - dicts = [d for d in gc.get_referrers(funcs[0]) - if isinstance(d, dict)] - if len(dicts) == 1: - classes = [c for c in gc.get_referrers(dicts[0]) - if hasattr(c, "__bases__")] - if len(classes) == 1: - # ditto for new.classobj() - clsname = classes[0].__name__ - # cache the result - assumption is that new.* is - # not called later to disturb this relationship - # _caller_cache could be flushed if functions in - # the new module get called. - self._caller_cache[code] = clsname - if clsname is not None: - funcname = "%s.%s" % (clsname, funcname) - - return filename, modulename, funcname - - def globaltrace_trackcallers(self, frame, why, arg): - """Handler for call events. - - Adds information about who called who to the self._callers dict. - """ - if why == 'call': - # XXX Should do a better job of identifying methods - this_func = self.file_module_function_of(frame) - parent_func = self.file_module_function_of(frame.f_back) - self._callers[(parent_func, this_func)] = 1 - - def globaltrace_countfuncs(self, frame, why, arg): - """Handler for call events. - - Adds (filename, modulename, funcname) to the self._calledfuncs dict. - """ - if why == 'call': - this_func = self.file_module_function_of(frame) - self._calledfuncs[this_func] = 1 - - def globaltrace_lt(self, frame, why, arg): - """Handler for call events. - - If the code block being entered is to be ignored, returns `None', - else returns self.localtrace. - """ - if why == 'call': - code = frame.f_code - filename = frame.f_globals.get('__file__', None) - if filename: - # XXX modname() doesn't work right for packages, so - # the ignore support won't work right for packages - modulename = modname(filename) - if modulename is not None: - ignore_it = self.ignore.names(filename, modulename) - if not ignore_it: - if self.trace: - print (" --- modulename: %s, funcname: %s" - % (modulename, code.co_name)) - return self.localtrace - else: - return None - - def localtrace_trace_and_count(self, frame, why, arg): - if why == "line": - # record the file name and line number of every trace - filename = frame.f_code.co_filename - lineno = frame.f_lineno - key = filename, lineno - self.counts[key] = self.counts.get(key, 0) + 1 - - if self.start_time: - print '%.2f' % (time.time() - self.start_time), - bname = os.path.basename(filename) - print "%s(%d): %s" % (bname, lineno, - linecache.getline(filename, lineno)), - return self.localtrace - - def localtrace_trace(self, frame, why, arg): - if why == "line": - # record the file name and line number of every trace - filename = frame.f_code.co_filename - lineno = frame.f_lineno - - if self.start_time: - print '%.2f' % (time.time() - self.start_time), - bname = os.path.basename(filename) - print "%s(%d): %s" % (bname, lineno, - linecache.getline(filename, lineno)), - return self.localtrace - - def localtrace_count(self, frame, why, arg): - if why == "line": - filename = frame.f_code.co_filename - lineno = frame.f_lineno - key = filename, lineno - self.counts[key] = self.counts.get(key, 0) + 1 - return self.localtrace - - def results(self): - return CoverageResults(self.counts, infile=self.infile, - outfile=self.outfile, - calledfuncs=self._calledfuncs, - callers=self._callers) - -def _err_exit(msg): - sys.stderr.write("%s: %s\n" % (sys.argv[0], msg)) - sys.exit(1) - -def main(argv=None): - import getopt - - if argv is None: - argv = sys.argv - try: - opts, prog_argv = getopt.getopt(argv[1:], "tcrRf:d:msC:lTg", - ["help", "version", "trace", "count", - "report", "no-report", "summary", - "file=", "missing", - "ignore-module=", "ignore-dir=", - "coverdir=", "listfuncs", - "trackcalls", "timing"]) - - except getopt.error, msg: - sys.stderr.write("%s: %s\n" % (sys.argv[0], msg)) - sys.stderr.write("Try `%s --help' for more information\n" - % sys.argv[0]) - sys.exit(1) - - trace = 0 - count = 0 - report = 0 - no_report = 0 - counts_file = None - missing = 0 - ignore_modules = [] - ignore_dirs = [] - coverdir = None - summary = 0 - listfuncs = False - countcallers = False - timing = False - - for opt, val in opts: - if opt == "--help": - usage(sys.stdout) - sys.exit(0) - - if opt == "--version": - sys.stdout.write("trace 2.0\n") - sys.exit(0) - - if opt == "-T" or opt == "--trackcalls": - countcallers = True - continue - - if opt == "-l" or opt == "--listfuncs": - listfuncs = True - continue - - if opt == "-g" or opt == "--timing": - timing = True - continue - - if opt == "-t" or opt == "--trace": - trace = 1 - continue - - if opt == "-c" or opt == "--count": - count = 1 - continue - - if opt == "-r" or opt == "--report": - report = 1 - continue - - if opt == "-R" or opt == "--no-report": - no_report = 1 - continue - - if opt == "-f" or opt == "--file": - counts_file = val - continue - - if opt == "-m" or opt == "--missing": - missing = 1 - continue - - if opt == "-C" or opt == "--coverdir": - coverdir = val - continue - - if opt == "-s" or opt == "--summary": - summary = 1 - continue - - if opt == "--ignore-module": - for mod in val.split(","): - ignore_modules.append(mod.strip()) - continue - - if opt == "--ignore-dir": - for s in val.split(os.pathsep): - s = os.path.expandvars(s) - # should I also call expanduser? (after all, could use $HOME) - - s = s.replace("$prefix", - os.path.join(sys.prefix, "lib", - "python" + sys.version[:3])) - s = s.replace("$exec_prefix", - os.path.join(sys.exec_prefix, "lib", - "python" + sys.version[:3])) - s = os.path.normpath(s) - ignore_dirs.append(s) - continue - - assert 0, "Should never get here" - - if listfuncs and (count or trace): - _err_exit("cannot specify both --listfuncs and (--trace or --count)") - - if not (count or trace or report or listfuncs or countcallers): - _err_exit("must specify one of --trace, --count, --report, " - "--listfuncs, or --trackcalls") - - if report and no_report: - _err_exit("cannot specify both --report and --no-report") - - if report and not counts_file: - _err_exit("--report requires a --file") - - if no_report and len(prog_argv) == 0: - _err_exit("missing name of file to run") - - # everything is ready - if report: - results = CoverageResults(infile=counts_file, outfile=counts_file) - results.write_results(missing, summary=summary, coverdir=coverdir) - else: - sys.argv = prog_argv - progname = prog_argv[0] - sys.path[0] = os.path.split(progname)[0] - - t = Trace(count, trace, countfuncs=listfuncs, - countcallers=countcallers, ignoremods=ignore_modules, - ignoredirs=ignore_dirs, infile=counts_file, - outfile=counts_file, timing=timing) - try: - with open(progname) as fp: - code = compile(fp.read(), progname, 'exec') - # try to emulate __main__ namespace as much as possible - globs = { - '__file__': progname, - '__name__': '__main__', - '__package__': None, - '__cached__': None, - } - t.runctx(code, globs, globs) - except IOError, err: - _err_exit("Cannot run file %r because: %s" % (sys.argv[0], err)) - except SystemExit: - pass - - results = t.results() - - if not no_report: - results.write_results(missing, summary=summary, coverdir=coverdir) - -if __name__=='__main__': - main() diff --git a/python/Lib/traceback.py b/python/Lib/traceback.py deleted file mode 100755 index 0a3dd11df6..0000000000 --- a/python/Lib/traceback.py +++ /dev/null @@ -1,320 +0,0 @@ -"""Extract, format and print information about Python stack traces.""" - -import linecache -import sys -import types - -__all__ = ['extract_stack', 'extract_tb', 'format_exception', - 'format_exception_only', 'format_list', 'format_stack', - 'format_tb', 'print_exc', 'format_exc', 'print_exception', - 'print_last', 'print_stack', 'print_tb', 'tb_lineno'] - -def _print(file, str='', terminator='\n'): - file.write(str+terminator) - - -def print_list(extracted_list, file=None): - """Print the list of tuples as returned by extract_tb() or - extract_stack() as a formatted stack trace to the given file.""" - if file is None: - file = sys.stderr - for filename, lineno, name, line in extracted_list: - _print(file, - ' File "%s", line %d, in %s' % (filename,lineno,name)) - if line: - _print(file, ' %s' % line.strip()) - -def format_list(extracted_list): - """Format a list of traceback entry tuples for printing. - - Given a list of tuples as returned by extract_tb() or - extract_stack(), return a list of strings ready for printing. - Each string in the resulting list corresponds to the item with the - same index in the argument list. Each string ends in a newline; - the strings may contain internal newlines as well, for those items - whose source text line is not None. - """ - list = [] - for filename, lineno, name, line in extracted_list: - item = ' File "%s", line %d, in %s\n' % (filename,lineno,name) - if line: - item = item + ' %s\n' % line.strip() - list.append(item) - return list - - -def print_tb(tb, limit=None, file=None): - """Print up to 'limit' stack trace entries from the traceback 'tb'. - - If 'limit' is omitted or None, all entries are printed. If 'file' - is omitted or None, the output goes to sys.stderr; otherwise - 'file' should be an open file or file-like object with a write() - method. - """ - if file is None: - file = sys.stderr - if limit is None: - if hasattr(sys, 'tracebacklimit'): - limit = sys.tracebacklimit - n = 0 - while tb is not None and (limit is None or n < limit): - f = tb.tb_frame - lineno = tb.tb_lineno - co = f.f_code - filename = co.co_filename - name = co.co_name - _print(file, - ' File "%s", line %d, in %s' % (filename, lineno, name)) - linecache.checkcache(filename) - line = linecache.getline(filename, lineno, f.f_globals) - if line: _print(file, ' ' + line.strip()) - tb = tb.tb_next - n = n+1 - -def format_tb(tb, limit = None): - """A shorthand for 'format_list(extract_tb(tb, limit))'.""" - return format_list(extract_tb(tb, limit)) - -def extract_tb(tb, limit = None): - """Return list of up to limit pre-processed entries from traceback. - - This is useful for alternate formatting of stack traces. If - 'limit' is omitted or None, all entries are extracted. A - pre-processed stack trace entry is a quadruple (filename, line - number, function name, text) representing the information that is - usually printed for a stack trace. The text is a string with - leading and trailing whitespace stripped; if the source is not - available it is None. - """ - if limit is None: - if hasattr(sys, 'tracebacklimit'): - limit = sys.tracebacklimit - list = [] - n = 0 - while tb is not None and (limit is None or n < limit): - f = tb.tb_frame - lineno = tb.tb_lineno - co = f.f_code - filename = co.co_filename - name = co.co_name - linecache.checkcache(filename) - line = linecache.getline(filename, lineno, f.f_globals) - if line: line = line.strip() - else: line = None - list.append((filename, lineno, name, line)) - tb = tb.tb_next - n = n+1 - return list - - -def print_exception(etype, value, tb, limit=None, file=None): - """Print exception up to 'limit' stack trace entries from 'tb' to 'file'. - - This differs from print_tb() in the following ways: (1) if - traceback is not None, it prints a header "Traceback (most recent - call last):"; (2) it prints the exception type and value after the - stack trace; (3) if type is SyntaxError and value has the - appropriate format, it prints the line where the syntax error - occurred with a caret on the next line indicating the approximate - position of the error. - """ - if file is None: - file = sys.stderr - if tb: - _print(file, 'Traceback (most recent call last):') - print_tb(tb, limit, file) - lines = format_exception_only(etype, value) - for line in lines: - _print(file, line, '') - -def format_exception(etype, value, tb, limit = None): - """Format a stack trace and the exception information. - - The arguments have the same meaning as the corresponding arguments - to print_exception(). The return value is a list of strings, each - ending in a newline and some containing internal newlines. When - these lines are concatenated and printed, exactly the same text is - printed as does print_exception(). - """ - if tb: - list = ['Traceback (most recent call last):\n'] - list = list + format_tb(tb, limit) - else: - list = [] - list = list + format_exception_only(etype, value) - return list - -def format_exception_only(etype, value): - """Format the exception part of a traceback. - - The arguments are the exception type and value such as given by - sys.last_type and sys.last_value. The return value is a list of - strings, each ending in a newline. - - Normally, the list contains a single string; however, for - SyntaxError exceptions, it contains several lines that (when - printed) display detailed information about where the syntax - error occurred. - - The message indicating which exception occurred is always the last - string in the list. - - """ - - # An instance should not have a meaningful value parameter, but - # sometimes does, particularly for string exceptions, such as - # >>> raise string1, string2 # deprecated - # - # Clear these out first because issubtype(string1, SyntaxError) - # would raise another exception and mask the original problem. - if (isinstance(etype, BaseException) or - isinstance(etype, types.InstanceType) or - etype is None or type(etype) is str): - return [_format_final_exc_line(etype, value)] - - stype = etype.__name__ - - if not issubclass(etype, SyntaxError): - return [_format_final_exc_line(stype, value)] - - # It was a syntax error; show exactly where the problem was found. - lines = [] - try: - msg, (filename, lineno, offset, badline) = value.args - except Exception: - pass - else: - filename = filename or "" - lines.append(' File "%s", line %d\n' % (filename, lineno)) - if badline is not None: - lines.append(' %s\n' % badline.strip()) - if offset is not None: - caretspace = badline.rstrip('\n') - offset = min(len(caretspace), offset) - 1 - caretspace = caretspace[:offset].lstrip() - # non-space whitespace (likes tabs) must be kept for alignment - caretspace = ((c.isspace() and c or ' ') for c in caretspace) - lines.append(' %s^\n' % ''.join(caretspace)) - value = msg - - lines.append(_format_final_exc_line(stype, value)) - return lines - -def _format_final_exc_line(etype, value): - """Return a list of a single line -- normal case for format_exception_only""" - valuestr = _some_str(value) - if value is None or not valuestr: - line = "%s\n" % etype - else: - line = "%s: %s\n" % (etype, valuestr) - return line - -def _some_str(value): - try: - return str(value) - except Exception: - pass - try: - value = unicode(value) - return value.encode("ascii", "backslashreplace") - except Exception: - pass - return '' % type(value).__name__ - - -def print_exc(limit=None, file=None): - """Shorthand for 'print_exception(sys.exc_type, sys.exc_value, sys.exc_traceback, limit, file)'. - (In fact, it uses sys.exc_info() to retrieve the same information - in a thread-safe way.)""" - if file is None: - file = sys.stderr - try: - etype, value, tb = sys.exc_info() - print_exception(etype, value, tb, limit, file) - finally: - etype = value = tb = None - - -def format_exc(limit=None): - """Like print_exc() but return a string.""" - try: - etype, value, tb = sys.exc_info() - return ''.join(format_exception(etype, value, tb, limit)) - finally: - etype = value = tb = None - - -def print_last(limit=None, file=None): - """This is a shorthand for 'print_exception(sys.last_type, - sys.last_value, sys.last_traceback, limit, file)'.""" - if not hasattr(sys, "last_type"): - raise ValueError("no last exception") - if file is None: - file = sys.stderr - print_exception(sys.last_type, sys.last_value, sys.last_traceback, - limit, file) - - -def print_stack(f=None, limit=None, file=None): - """Print a stack trace from its invocation point. - - The optional 'f' argument can be used to specify an alternate - stack frame at which to start. The optional 'limit' and 'file' - arguments have the same meaning as for print_exception(). - """ - if f is None: - try: - raise ZeroDivisionError - except ZeroDivisionError: - f = sys.exc_info()[2].tb_frame.f_back - print_list(extract_stack(f, limit), file) - -def format_stack(f=None, limit=None): - """Shorthand for 'format_list(extract_stack(f, limit))'.""" - if f is None: - try: - raise ZeroDivisionError - except ZeroDivisionError: - f = sys.exc_info()[2].tb_frame.f_back - return format_list(extract_stack(f, limit)) - -def extract_stack(f=None, limit = None): - """Extract the raw traceback from the current stack frame. - - The return value has the same format as for extract_tb(). The - optional 'f' and 'limit' arguments have the same meaning as for - print_stack(). Each item in the list is a quadruple (filename, - line number, function name, text), and the entries are in order - from oldest to newest stack frame. - """ - if f is None: - try: - raise ZeroDivisionError - except ZeroDivisionError: - f = sys.exc_info()[2].tb_frame.f_back - if limit is None: - if hasattr(sys, 'tracebacklimit'): - limit = sys.tracebacklimit - list = [] - n = 0 - while f is not None and (limit is None or n < limit): - lineno = f.f_lineno - co = f.f_code - filename = co.co_filename - name = co.co_name - linecache.checkcache(filename) - line = linecache.getline(filename, lineno, f.f_globals) - if line: line = line.strip() - else: line = None - list.append((filename, lineno, name, line)) - f = f.f_back - n = n+1 - list.reverse() - return list - -def tb_lineno(tb): - """Calculate correct line number of traceback given in tb. - - Obsolete in 2.3. - """ - return tb.tb_lineno diff --git a/python/Lib/tty.py b/python/Lib/tty.py deleted file mode 100755 index a72eb67554..0000000000 --- a/python/Lib/tty.py +++ /dev/null @@ -1,36 +0,0 @@ -"""Terminal utilities.""" - -# Author: Steen Lumholt. - -from termios import * - -__all__ = ["setraw", "setcbreak"] - -# Indexes for termios list. -IFLAG = 0 -OFLAG = 1 -CFLAG = 2 -LFLAG = 3 -ISPEED = 4 -OSPEED = 5 -CC = 6 - -def setraw(fd, when=TCSAFLUSH): - """Put terminal into a raw mode.""" - mode = tcgetattr(fd) - mode[IFLAG] = mode[IFLAG] & ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON) - mode[OFLAG] = mode[OFLAG] & ~(OPOST) - mode[CFLAG] = mode[CFLAG] & ~(CSIZE | PARENB) - mode[CFLAG] = mode[CFLAG] | CS8 - mode[LFLAG] = mode[LFLAG] & ~(ECHO | ICANON | IEXTEN | ISIG) - mode[CC][VMIN] = 1 - mode[CC][VTIME] = 0 - tcsetattr(fd, when, mode) - -def setcbreak(fd, when=TCSAFLUSH): - """Put terminal into a cbreak mode.""" - mode = tcgetattr(fd) - mode[LFLAG] = mode[LFLAG] & ~(ECHO | ICANON) - mode[CC][VMIN] = 1 - mode[CC][VTIME] = 0 - tcsetattr(fd, when, mode) diff --git a/python/Lib/types.py b/python/Lib/types.py deleted file mode 100755 index d414f54931..0000000000 --- a/python/Lib/types.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Define names for all type symbols known in the standard interpreter. - -Types that are part of optional modules (e.g. array) are not listed. -""" -import sys - -# Iterators in Python aren't a matter of type but of protocol. A large -# and changing number of builtin types implement *some* flavor of -# iterator. Don't check the type! Use hasattr to check for both -# "__iter__" and "next" attributes instead. - -NoneType = type(None) -TypeType = type -ObjectType = object - -IntType = int -LongType = long -FloatType = float -BooleanType = bool -try: - ComplexType = complex -except NameError: - pass - -StringType = str - -# StringTypes is already outdated. Instead of writing "type(x) in -# types.StringTypes", you should use "isinstance(x, basestring)". But -# we keep around for compatibility with Python 2.2. -try: - UnicodeType = unicode - StringTypes = (StringType, UnicodeType) -except NameError: - StringTypes = (StringType,) - -BufferType = buffer - -TupleType = tuple -ListType = list -DictType = DictionaryType = dict - -def _f(): pass -FunctionType = type(_f) -LambdaType = type(lambda: None) # Same as FunctionType -CodeType = type(_f.func_code) - -def _g(): - yield 1 -GeneratorType = type(_g()) - -class _C: - def _m(self): pass -ClassType = type(_C) -UnboundMethodType = type(_C._m) # Same as MethodType -_x = _C() -InstanceType = type(_x) -MethodType = type(_x._m) - -BuiltinFunctionType = type(len) -BuiltinMethodType = type([].append) # Same as BuiltinFunctionType - -ModuleType = type(sys) -FileType = file -XRangeType = xrange - -try: - raise TypeError -except TypeError: - tb = sys.exc_info()[2] - TracebackType = type(tb) - FrameType = type(tb.tb_frame) - del tb - -SliceType = slice -EllipsisType = type(Ellipsis) - -DictProxyType = type(TypeType.__dict__) -NotImplementedType = type(NotImplemented) - -# For Jython, the following two types are identical -GetSetDescriptorType = type(FunctionType.func_code) -MemberDescriptorType = type(FunctionType.func_globals) - -del sys, _f, _g, _C, _x # Not for export - -__all__ = list(n for n in globals() if n[:1] != '_') diff --git a/python/Lib/unittest.py b/python/Lib/unittest.py deleted file mode 100644 index 09c6ca97c8..0000000000 --- a/python/Lib/unittest.py +++ /dev/null @@ -1,872 +0,0 @@ -#!/usr/bin/env python -''' -Python unit testing framework, based on Erich Gamma's JUnit and Kent Beck's -Smalltalk testing framework. - -This module contains the core framework classes that form the basis of -specific test cases and suites (TestCase, TestSuite etc.), and also a -text-based utility class for running the tests and reporting the results - (TextTestRunner). - -Simple usage: - - import unittest - - class IntegerArithmenticTestCase(unittest.TestCase): - def testAdd(self): ## test method names begin 'test*' - self.assertEquals((1 + 2), 3) - self.assertEquals(0 + 1, 1) - def testMultiply(self): - self.assertEquals((0 * 10), 0) - self.assertEquals((5 * 8), 40) - - if __name__ == '__main__': - unittest.main() - -Further information is available in the bundled documentation, and from - - http://docs.python.org/lib/module-unittest.html - -Copyright (c) 1999-2003 Steve Purcell -This module is free software, and you may redistribute it and/or modify -it under the same terms as Python itself, so long as this copyright message -and disclaimer are retained in their original form. - -IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, -SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF -THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGE. - -THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE. THE CODE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, -AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, -SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. -''' - -__author__ = "Steve Purcell" -__email__ = "stephen_purcell at yahoo dot com" -__version__ = "#Revision: 1.63 $"[11:-2] - -import time -import sys -import traceback -import os -import types - -############################################################################## -# Exported classes and functions -############################################################################## -__all__ = ['TestResult', 'TestCase', 'TestSuite', 'TextTestRunner', - 'TestLoader', 'FunctionTestCase', 'main', 'defaultTestLoader'] - -# Expose obsolete functions for backwards compatibility -__all__.extend(['getTestCaseNames', 'makeSuite', 'findTestCases']) - - -############################################################################## -# Backward compatibility -############################################################################## -if sys.version_info[:2] < (2, 2): - def isinstance(obj, clsinfo): - import __builtin__ - if type(clsinfo) in (tuple, list): - for cls in clsinfo: - if cls is type: cls = types.ClassType - if __builtin__.isinstance(obj, cls): - return 1 - return 0 - else: return __builtin__.isinstance(obj, clsinfo) - -def _CmpToKey(mycmp): - 'Convert a cmp= function into a key= function' - class K(object): - def __init__(self, obj): - self.obj = obj - def __lt__(self, other): - return mycmp(self.obj, other.obj) == -1 - return K - -############################################################################## -# Test framework core -############################################################################## - -# All classes defined herein are 'new-style' classes, allowing use of 'super()' -__metaclass__ = type - -def _strclass(cls): - return "%s.%s" % (cls.__module__, cls.__name__) - -__unittest = 1 - -class TestResult: - """Holder for test result information. - - Test results are automatically managed by the TestCase and TestSuite - classes, and do not need to be explicitly manipulated by writers of tests. - - Each instance holds the total number of tests run, and collections of - failures and errors that occurred among those test runs. The collections - contain tuples of (testcase, exceptioninfo), where exceptioninfo is the - formatted traceback of the error that occurred. - """ - def __init__(self): - self.failures = [] - self.errors = [] - self.testsRun = 0 - self.shouldStop = False - - def startTest(self, test): - "Called when the given test is about to be run" - self.testsRun = self.testsRun + 1 - - def stopTest(self, test): - "Called when the given test has been run" - pass - - def addError(self, test, err): - """Called when an error has occurred. 'err' is a tuple of values as - returned by sys.exc_info(). - """ - self.errors.append((test, self._exc_info_to_string(err, test))) - - def addFailure(self, test, err): - """Called when an error has occurred. 'err' is a tuple of values as - returned by sys.exc_info().""" - self.failures.append((test, self._exc_info_to_string(err, test))) - - def addSuccess(self, test): - "Called when a test has completed successfully" - pass - - def wasSuccessful(self): - "Tells whether or not this result was a success" - return len(self.failures) == len(self.errors) == 0 - - def stop(self): - "Indicates that the tests should be aborted" - self.shouldStop = True - - def _exc_info_to_string(self, err, test): - """Converts a sys.exc_info()-style tuple of values into a string.""" - exctype, value, tb = err - # Skip test runner traceback levels - while tb and self._is_relevant_tb_level(tb): - tb = tb.tb_next - if exctype is test.failureException: - # Skip assert*() traceback levels - length = self._count_relevant_tb_levels(tb) - return ''.join(traceback.format_exception(exctype, value, tb, length)) - return ''.join(traceback.format_exception(exctype, value, tb)) - - def _is_relevant_tb_level(self, tb): - return '__unittest' in tb.tb_frame.f_globals - - def _count_relevant_tb_levels(self, tb): - length = 0 - while tb and not self._is_relevant_tb_level(tb): - length += 1 - tb = tb.tb_next - return length - - def __repr__(self): - return "<%s run=%i errors=%i failures=%i>" % \ - (_strclass(self.__class__), self.testsRun, len(self.errors), - len(self.failures)) - -class TestCase: - """A class whose instances are single test cases. - - By default, the test code itself should be placed in a method named - 'runTest'. - - If the fixture may be used for many test cases, create as - many test methods as are needed. When instantiating such a TestCase - subclass, specify in the constructor arguments the name of the test method - that the instance is to execute. - - Test authors should subclass TestCase for their own tests. Construction - and deconstruction of the test's environment ('fixture') can be - implemented by overriding the 'setUp' and 'tearDown' methods respectively. - - If it is necessary to override the __init__ method, the base class - __init__ method must always be called. It is important that subclasses - should not change the signature of their __init__ method, since instances - of the classes are instantiated automatically by parts of the framework - in order to be run. - """ - - # This attribute determines which exception will be raised when - # the instance's assertion methods fail; test methods raising this - # exception will be deemed to have 'failed' rather than 'errored' - - failureException = AssertionError - - def __init__(self, methodName='runTest'): - """Create an instance of the class that will use the named test - method when executed. Raises a ValueError if the instance does - not have a method with the specified name. - """ - try: - self._testMethodName = methodName - testMethod = getattr(self, methodName) - self._testMethodDoc = testMethod.__doc__ - except AttributeError: - raise ValueError, "no such test method in %s: %s" % \ - (self.__class__, methodName) - - def setUp(self): - "Hook method for setting up the test fixture before exercising it." - pass - - def tearDown(self): - "Hook method for deconstructing the test fixture after testing it." - pass - - def countTestCases(self): - return 1 - - def defaultTestResult(self): - return TestResult() - - def shortDescription(self): - """Returns a one-line description of the test, or None if no - description has been provided. - - The default implementation of this method returns the first line of - the specified test method's docstring. - """ - doc = self._testMethodDoc - return doc and doc.split("\n")[0].strip() or None - - def id(self): - return "%s.%s" % (_strclass(self.__class__), self._testMethodName) - - def __eq__(self, other): - if type(self) is not type(other): - return False - - return self._testMethodName == other._testMethodName - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((type(self), self._testMethodName)) - - def __str__(self): - return "%s (%s)" % (self._testMethodName, _strclass(self.__class__)) - - def __repr__(self): - return "<%s testMethod=%s>" % \ - (_strclass(self.__class__), self._testMethodName) - - def run(self, result=None): - if result is None: result = self.defaultTestResult() - result.startTest(self) - testMethod = getattr(self, self._testMethodName) - try: - try: - self.setUp() - except KeyboardInterrupt: - raise - except: - result.addError(self, self._exc_info()) - return - - ok = False - try: - testMethod() - ok = True - except self.failureException: - result.addFailure(self, self._exc_info()) - except KeyboardInterrupt: - raise - except: - result.addError(self, self._exc_info()) - - try: - self.tearDown() - except KeyboardInterrupt: - raise - except: - result.addError(self, self._exc_info()) - ok = False - if ok: result.addSuccess(self) - finally: - result.stopTest(self) - - def __call__(self, *args, **kwds): - return self.run(*args, **kwds) - - def debug(self): - """Run the test without collecting errors in a TestResult""" - self.setUp() - getattr(self, self._testMethodName)() - self.tearDown() - - def _exc_info(self): - """Return a version of sys.exc_info() with the traceback frame - minimised; usually the top level of the traceback frame is not - needed. - """ - return sys.exc_info() - - def fail(self, msg=None): - """Fail immediately, with the given message.""" - raise self.failureException, msg - - def failIf(self, expr, msg=None): - "Fail the test if the expression is true." - if expr: raise self.failureException, msg - - def failUnless(self, expr, msg=None): - """Fail the test unless the expression is true.""" - if not expr: raise self.failureException, msg - - def failUnlessRaises(self, excClass, callableObj, *args, **kwargs): - """Fail unless an exception of class excClass is thrown - by callableObj when invoked with arguments args and keyword - arguments kwargs. If a different type of exception is - thrown, it will not be caught, and the test case will be - deemed to have suffered an error, exactly as for an - unexpected exception. - """ - try: - callableObj(*args, **kwargs) - except excClass: - return - else: - if hasattr(excClass,'__name__'): excName = excClass.__name__ - else: excName = str(excClass) - raise self.failureException, "%s not raised" % excName - - def failUnlessEqual(self, first, second, msg=None): - """Fail if the two objects are unequal as determined by the '==' - operator. - """ - if not first == second: - raise self.failureException, \ - (msg or '%r != %r' % (first, second)) - - def failIfEqual(self, first, second, msg=None): - """Fail if the two objects are equal as determined by the '==' - operator. - """ - if first == second: - raise self.failureException, \ - (msg or '%r == %r' % (first, second)) - - def failUnlessAlmostEqual(self, first, second, places=7, msg=None): - """Fail if the two objects are unequal as determined by their - difference rounded to the given number of decimal places - (default 7) and comparing to zero. - - Note that decimal places (from zero) are usually not the same - as significant digits (measured from the most signficant digit). - """ - if round(abs(second-first), places) != 0: - raise self.failureException, \ - (msg or '%r != %r within %r places' % (first, second, places)) - - def failIfAlmostEqual(self, first, second, places=7, msg=None): - """Fail if the two objects are equal as determined by their - difference rounded to the given number of decimal places - (default 7) and comparing to zero. - - Note that decimal places (from zero) are usually not the same - as significant digits (measured from the most signficant digit). - """ - if round(abs(second-first), places) == 0: - raise self.failureException, \ - (msg or '%r == %r within %r places' % (first, second, places)) - - # Synonyms for assertion methods - - assertEqual = assertEquals = failUnlessEqual - - assertNotEqual = assertNotEquals = failIfEqual - - assertAlmostEqual = assertAlmostEquals = failUnlessAlmostEqual - - assertNotAlmostEqual = assertNotAlmostEquals = failIfAlmostEqual - - assertRaises = failUnlessRaises - - assert_ = assertTrue = failUnless - - assertFalse = failIf - - - -class TestSuite: - """A test suite is a composite test consisting of a number of TestCases. - - For use, create an instance of TestSuite, then add test case instances. - When all tests have been added, the suite can be passed to a test - runner, such as TextTestRunner. It will run the individual test cases - in the order in which they were added, aggregating the results. When - subclassing, do not forget to call the base class constructor. - """ - def __init__(self, tests=()): - self._tests = [] - self.addTests(tests) - - def __repr__(self): - return "<%s tests=%s>" % (_strclass(self.__class__), self._tests) - - __str__ = __repr__ - - def __eq__(self, other): - if type(self) is not type(other): - return False - return self._tests == other._tests - - def __ne__(self, other): - return not self == other - - # Can't guarantee hash invariant, so flag as unhashable - __hash__ = None - - def __iter__(self): - return iter(self._tests) - - def countTestCases(self): - cases = 0 - for test in self._tests: - cases += test.countTestCases() - return cases - - def addTest(self, test): - # sanity checks - if not hasattr(test, '__call__'): - raise TypeError("the test to add must be callable") - if (isinstance(test, (type, types.ClassType)) and - issubclass(test, (TestCase, TestSuite))): - raise TypeError("TestCases and TestSuites must be instantiated " - "before passing them to addTest()") - self._tests.append(test) - - def addTests(self, tests): - if isinstance(tests, basestring): - raise TypeError("tests must be an iterable of tests, not a string") - for test in tests: - self.addTest(test) - - def run(self, result): - for test in self._tests: - if result.shouldStop: - break - test(result) - return result - - def __call__(self, *args, **kwds): - return self.run(*args, **kwds) - - def debug(self): - """Run the tests without collecting errors in a TestResult""" - for test in self._tests: test.debug() - - -class FunctionTestCase(TestCase): - """A test case that wraps a test function. - - This is useful for slipping pre-existing test functions into the - unittest framework. Optionally, set-up and tidy-up functions can be - supplied. As with TestCase, the tidy-up ('tearDown') function will - always be called if the set-up ('setUp') function ran successfully. - """ - - def __init__(self, testFunc, setUp=None, tearDown=None, - description=None): - TestCase.__init__(self) - self.__setUpFunc = setUp - self.__tearDownFunc = tearDown - self.__testFunc = testFunc - self.__description = description - - def setUp(self): - if self.__setUpFunc is not None: - self.__setUpFunc() - - def tearDown(self): - if self.__tearDownFunc is not None: - self.__tearDownFunc() - - def runTest(self): - self.__testFunc() - - def id(self): - return self.__testFunc.__name__ - - def __eq__(self, other): - if type(self) is not type(other): - return False - - return self.__setUpFunc == other.__setUpFunc and \ - self.__tearDownFunc == other.__tearDownFunc and \ - self.__testFunc == other.__testFunc and \ - self.__description == other.__description - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((type(self), self.__setUpFunc, self.__tearDownFunc, - self.__testFunc, self.__description)) - - def __str__(self): - return "%s (%s)" % (_strclass(self.__class__), self.__testFunc.__name__) - - def __repr__(self): - return "<%s testFunc=%s>" % (_strclass(self.__class__), self.__testFunc) - - def shortDescription(self): - if self.__description is not None: return self.__description - doc = self.__testFunc.__doc__ - return doc and doc.split("\n")[0].strip() or None - - - -############################################################################## -# Locating and loading tests -############################################################################## - -class TestLoader: - """This class is responsible for loading tests according to various - criteria and returning them wrapped in a TestSuite - """ - testMethodPrefix = 'test' - sortTestMethodsUsing = cmp - suiteClass = TestSuite - - def loadTestsFromTestCase(self, testCaseClass): - """Return a suite of all tests cases contained in testCaseClass""" - if issubclass(testCaseClass, TestSuite): - raise TypeError("Test cases should not be derived from TestSuite. Maybe you meant to derive from TestCase?") - testCaseNames = self.getTestCaseNames(testCaseClass) - if not testCaseNames and hasattr(testCaseClass, 'runTest'): - testCaseNames = ['runTest'] - return self.suiteClass(map(testCaseClass, testCaseNames)) - - def loadTestsFromModule(self, module): - """Return a suite of all tests cases contained in the given module""" - tests = [] - for name in dir(module): - obj = getattr(module, name) - if (isinstance(obj, (type, types.ClassType)) and - issubclass(obj, TestCase)): - tests.append(self.loadTestsFromTestCase(obj)) - return self.suiteClass(tests) - - def loadTestsFromName(self, name, module=None): - """Return a suite of all tests cases given a string specifier. - - The name may resolve either to a module, a test case class, a - test method within a test case class, or a callable object which - returns a TestCase or TestSuite instance. - - The method optionally resolves the names relative to a given module. - """ - parts = name.split('.') - if module is None: - parts_copy = parts[:] - while parts_copy: - try: - module = __import__('.'.join(parts_copy)) - break - except ImportError: - del parts_copy[-1] - if not parts_copy: raise - parts = parts[1:] - obj = module - for part in parts: - parent, obj = obj, getattr(obj, part) - - if type(obj) == types.ModuleType: - return self.loadTestsFromModule(obj) - elif (isinstance(obj, (type, types.ClassType)) and - issubclass(obj, TestCase)): - return self.loadTestsFromTestCase(obj) - elif (type(obj) == types.UnboundMethodType and - isinstance(parent, (type, types.ClassType)) and - issubclass(parent, TestCase)): - return TestSuite([parent(obj.__name__)]) - elif isinstance(obj, TestSuite): - return obj - elif hasattr(obj, '__call__'): - test = obj() - if isinstance(test, TestSuite): - return test - elif isinstance(test, TestCase): - return TestSuite([test]) - else: - raise TypeError("calling %s returned %s, not a test" % - (obj, test)) - else: - raise TypeError("don't know how to make test from: %s" % obj) - - def loadTestsFromNames(self, names, module=None): - """Return a suite of all tests cases found using the given sequence - of string specifiers. See 'loadTestsFromName()'. - """ - suites = [self.loadTestsFromName(name, module) for name in names] - return self.suiteClass(suites) - - def getTestCaseNames(self, testCaseClass): - """Return a sorted sequence of method names found within testCaseClass - """ - def isTestMethod(attrname, testCaseClass=testCaseClass, prefix=self.testMethodPrefix): - return attrname.startswith(prefix) and hasattr(getattr(testCaseClass, attrname), '__call__') - testFnNames = filter(isTestMethod, dir(testCaseClass)) - if self.sortTestMethodsUsing: - testFnNames.sort(key=_CmpToKey(self.sortTestMethodsUsing)) - return testFnNames - - - -defaultTestLoader = TestLoader() - - -############################################################################## -# Patches for old functions: these functions should be considered obsolete -############################################################################## - -def _makeLoader(prefix, sortUsing, suiteClass=None): - loader = TestLoader() - loader.sortTestMethodsUsing = sortUsing - loader.testMethodPrefix = prefix - if suiteClass: loader.suiteClass = suiteClass - return loader - -def getTestCaseNames(testCaseClass, prefix, sortUsing=cmp): - return _makeLoader(prefix, sortUsing).getTestCaseNames(testCaseClass) - -def makeSuite(testCaseClass, prefix='test', sortUsing=cmp, suiteClass=TestSuite): - return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromTestCase(testCaseClass) - -def findTestCases(module, prefix='test', sortUsing=cmp, suiteClass=TestSuite): - return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromModule(module) - - -############################################################################## -# Text UI -############################################################################## - -class _WritelnDecorator: - """Used to decorate file-like objects with a handy 'writeln' method""" - def __init__(self,stream): - self.stream = stream - - def __getattr__(self, attr): - return getattr(self.stream,attr) - - def writeln(self, arg=None): - if arg: self.write(arg) - self.write('\n') # text-mode streams translate to \r\n if needed - - -class _TextTestResult(TestResult): - """A test result class that can print formatted text results to a stream. - - Used by TextTestRunner. - """ - separator1 = '=' * 70 - separator2 = '-' * 70 - - def __init__(self, stream, descriptions, verbosity): - TestResult.__init__(self) - self.stream = stream - self.showAll = verbosity > 1 - self.dots = verbosity == 1 - self.descriptions = descriptions - - def getDescription(self, test): - if self.descriptions: - return test.shortDescription() or str(test) - else: - return str(test) - - def startTest(self, test): - TestResult.startTest(self, test) - if self.showAll: - self.stream.write(self.getDescription(test)) - self.stream.write(" ... ") - self.stream.flush() - - def addSuccess(self, test): - TestResult.addSuccess(self, test) - if self.showAll: - self.stream.writeln("ok") - elif self.dots: - self.stream.write('.') - self.stream.flush() - - def addError(self, test, err): - TestResult.addError(self, test, err) - if self.showAll: - self.stream.writeln("ERROR") - elif self.dots: - self.stream.write('E') - self.stream.flush() - - def addFailure(self, test, err): - TestResult.addFailure(self, test, err) - if self.showAll: - self.stream.writeln("FAIL") - elif self.dots: - self.stream.write('F') - self.stream.flush() - - def printErrors(self): - if self.dots or self.showAll: - self.stream.writeln() - self.printErrorList('ERROR', self.errors) - self.printErrorList('FAIL', self.failures) - - def printErrorList(self, flavour, errors): - for test, err in errors: - self.stream.writeln(self.separator1) - self.stream.writeln("%s: %s" % (flavour,self.getDescription(test))) - self.stream.writeln(self.separator2) - self.stream.writeln("%s" % err) - - -class TextTestRunner: - """A test runner class that displays results in textual form. - - It prints out the names of tests as they are run, errors as they - occur, and a summary of the results at the end of the test run. - """ - def __init__(self, stream=sys.stderr, descriptions=1, verbosity=1): - self.stream = _WritelnDecorator(stream) - self.descriptions = descriptions - self.verbosity = verbosity - - def _makeResult(self): - return _TextTestResult(self.stream, self.descriptions, self.verbosity) - - def run(self, test): - "Run the given test case or test suite." - result = self._makeResult() - startTime = time.time() - test(result) - stopTime = time.time() - timeTaken = stopTime - startTime - result.printErrors() - self.stream.writeln(result.separator2) - run = result.testsRun - self.stream.writeln("Ran %d test%s in %.3fs" % - (run, run != 1 and "s" or "", timeTaken)) - self.stream.writeln() - if not result.wasSuccessful(): - self.stream.write("FAILED (") - failed, errored = map(len, (result.failures, result.errors)) - if failed: - self.stream.write("failures=%d" % failed) - if errored: - if failed: self.stream.write(", ") - self.stream.write("errors=%d" % errored) - self.stream.writeln(")") - else: - self.stream.writeln("OK") - return result - - - -############################################################################## -# Facilities for running tests from the command line -############################################################################## - -class TestProgram: - """A command-line program that runs a set of tests; this is primarily - for making test modules conveniently executable. - """ - USAGE = """\ -Usage: %(progName)s [options] [test] [...] - -Options: - -h, --help Show this message - -v, --verbose Verbose output - -q, --quiet Minimal output - -Examples: - %(progName)s - run default set of tests - %(progName)s MyTestSuite - run suite 'MyTestSuite' - %(progName)s MyTestCase.testSomething - run MyTestCase.testSomething - %(progName)s MyTestCase - run all 'test*' test methods - in MyTestCase -""" - def __init__(self, module='__main__', defaultTest=None, - argv=None, testRunner=TextTestRunner, - testLoader=defaultTestLoader): - if type(module) == type(''): - self.module = __import__(module) - for part in module.split('.')[1:]: - self.module = getattr(self.module, part) - else: - self.module = module - if argv is None: - argv = sys.argv - self.verbosity = 1 - self.defaultTest = defaultTest - self.testRunner = testRunner - self.testLoader = testLoader - self.progName = os.path.basename(argv[0]) - self.parseArgs(argv) - self.runTests() - - def usageExit(self, msg=None): - if msg: print msg - print self.USAGE % self.__dict__ - sys.exit(2) - - def parseArgs(self, argv): - import getopt - try: - options, args = getopt.getopt(argv[1:], 'hHvq', - ['help','verbose','quiet']) - for opt, value in options: - if opt in ('-h','-H','--help'): - self.usageExit() - if opt in ('-q','--quiet'): - self.verbosity = 0 - if opt in ('-v','--verbose'): - self.verbosity = 2 - if len(args) == 0 and self.defaultTest is None: - self.test = self.testLoader.loadTestsFromModule(self.module) - return - if len(args) > 0: - self.testNames = args - else: - self.testNames = (self.defaultTest,) - self.createTests() - except getopt.error, msg: - self.usageExit(msg) - - def createTests(self): - self.test = self.testLoader.loadTestsFromNames(self.testNames, - self.module) - - def runTests(self): - if isinstance(self.testRunner, (type, types.ClassType)): - try: - testRunner = self.testRunner(verbosity=self.verbosity) - except TypeError: - # didn't accept the verbosity argument - testRunner = self.testRunner() - else: - # it is assumed to be a TestRunner instance - testRunner = self.testRunner - result = testRunner.run(self.test) - sys.exit(not result.wasSuccessful()) - -main = TestProgram - - -############################################################################## -# Executing this module from the command line -############################################################################## - -if __name__ == "__main__": - main(module=None) diff --git a/python/Lib/unittest/__init__.py b/python/Lib/unittest/__init__.py deleted file mode 100644 index a5d50af78f..0000000000 --- a/python/Lib/unittest/__init__.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Python unit testing framework, based on Erich Gamma's JUnit and Kent Beck's -Smalltalk testing framework. - -This module contains the core framework classes that form the basis of -specific test cases and suites (TestCase, TestSuite etc.), and also a -text-based utility class for running the tests and reporting the results - (TextTestRunner). - -Simple usage: - - import unittest - - class IntegerArithmeticTestCase(unittest.TestCase): - def testAdd(self): ## test method names begin 'test*' - self.assertEqual((1 + 2), 3) - self.assertEqual(0 + 1, 1) - def testMultiply(self): - self.assertEqual((0 * 10), 0) - self.assertEqual((5 * 8), 40) - - if __name__ == '__main__': - unittest.main() - -Further information is available in the bundled documentation, and from - - http://docs.python.org/library/unittest.html - -Copyright (c) 1999-2003 Steve Purcell -Copyright (c) 2003-2010 Python Software Foundation -This module is free software, and you may redistribute it and/or modify -it under the same terms as Python itself, so long as this copyright message -and disclaimer are retained in their original form. - -IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, -SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF -THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGE. - -THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE. THE CODE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, -AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, -SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. -""" - -__all__ = ['TestResult', 'TestCase', 'TestSuite', - 'TextTestRunner', 'TestLoader', 'FunctionTestCase', 'main', - 'defaultTestLoader', 'SkipTest', 'skip', 'skipIf', 'skipUnless', - 'expectedFailure', 'TextTestResult', 'installHandler', - 'registerResult', 'removeResult', 'removeHandler'] - -# Expose obsolete functions for backwards compatibility -__all__.extend(['getTestCaseNames', 'makeSuite', 'findTestCases']) - -__unittest = True - -from .result import TestResult -from .case import (TestCase, FunctionTestCase, SkipTest, skip, skipIf, - skipUnless, expectedFailure) -from .suite import BaseTestSuite, TestSuite -from .loader import (TestLoader, defaultTestLoader, makeSuite, getTestCaseNames, - findTestCases) -from .main import TestProgram, main -from .runner import TextTestRunner, TextTestResult -from .signals import installHandler, registerResult, removeResult, removeHandler - -# deprecated -_TextTestResult = TextTestResult diff --git a/python/Lib/unittest/__main__.py b/python/Lib/unittest/__main__.py deleted file mode 100644 index 7320050ae9..0000000000 --- a/python/Lib/unittest/__main__.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Main entry point""" - -import sys -if sys.argv[0].endswith("__main__.py"): - sys.argv[0] = "python -m unittest" - -__unittest = True - -from .main import main, TestProgram, USAGE_AS_MAIN -TestProgram.USAGE = USAGE_AS_MAIN - -main(module=None) diff --git a/python/Lib/unittest/case.py b/python/Lib/unittest/case.py deleted file mode 100644 index 8f4610145a..0000000000 --- a/python/Lib/unittest/case.py +++ /dev/null @@ -1,1076 +0,0 @@ -"""Test case implementation""" - -import collections -import sys -import functools -import difflib -import pprint -import re -import types -import warnings - -from . import result -from .util import ( - strclass, safe_repr, unorderable_list_difference, - _count_diff_all_purpose, _count_diff_hashable -) - - -__unittest = True - - -DIFF_OMITTED = ('\nDiff is %s characters long. ' - 'Set self.maxDiff to None to see it.') - -class SkipTest(Exception): - """ - Raise this exception in a test to skip it. - - Usually you can use TestCase.skipTest() or one of the skipping decorators - instead of raising this directly. - """ - pass - -class _ExpectedFailure(Exception): - """ - Raise this when a test is expected to fail. - - This is an implementation detail. - """ - - def __init__(self, exc_info): - super(_ExpectedFailure, self).__init__() - self.exc_info = exc_info - -class _UnexpectedSuccess(Exception): - """ - The test was supposed to fail, but it didn't! - """ - pass - -def _id(obj): - return obj - -def skip(reason): - """ - Unconditionally skip a test. - """ - def decorator(test_item): - if not isinstance(test_item, (type, types.ClassType)): - @functools.wraps(test_item) - def skip_wrapper(*args, **kwargs): - raise SkipTest(reason) - test_item = skip_wrapper - - test_item.__unittest_skip__ = True - test_item.__unittest_skip_why__ = reason - return test_item - return decorator - -def skipIf(condition, reason): - """ - Skip a test if the condition is true. - """ - if condition: - return skip(reason) - return _id - -def skipUnless(condition, reason): - """ - Skip a test unless the condition is true. - """ - if not condition: - return skip(reason) - return _id - - -def expectedFailure(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - try: - func(*args, **kwargs) - except Exception: - raise _ExpectedFailure(sys.exc_info()) - raise _UnexpectedSuccess - return wrapper - - -class _AssertRaisesContext(object): - """A context manager used to implement TestCase.assertRaises* methods.""" - - def __init__(self, expected, test_case, expected_regexp=None): - self.expected = expected - self.failureException = test_case.failureException - self.expected_regexp = expected_regexp - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, tb): - if exc_type is None: - try: - exc_name = self.expected.__name__ - except AttributeError: - exc_name = str(self.expected) - raise self.failureException( - "{0} not raised".format(exc_name)) - if not issubclass(exc_type, self.expected): - # let unexpected exceptions pass through - return False - self.exception = exc_value # store for later retrieval - if self.expected_regexp is None: - return True - - expected_regexp = self.expected_regexp - if not expected_regexp.search(str(exc_value)): - raise self.failureException('"%s" does not match "%s"' % - (expected_regexp.pattern, str(exc_value))) - return True - - -class TestCase(object): - """A class whose instances are single test cases. - - By default, the test code itself should be placed in a method named - 'runTest'. - - If the fixture may be used for many test cases, create as - many test methods as are needed. When instantiating such a TestCase - subclass, specify in the constructor arguments the name of the test method - that the instance is to execute. - - Test authors should subclass TestCase for their own tests. Construction - and deconstruction of the test's environment ('fixture') can be - implemented by overriding the 'setUp' and 'tearDown' methods respectively. - - If it is necessary to override the __init__ method, the base class - __init__ method must always be called. It is important that subclasses - should not change the signature of their __init__ method, since instances - of the classes are instantiated automatically by parts of the framework - in order to be run. - - When subclassing TestCase, you can set these attributes: - * failureException: determines which exception will be raised when - the instance's assertion methods fail; test methods raising this - exception will be deemed to have 'failed' rather than 'errored'. - * longMessage: determines whether long messages (including repr of - objects used in assert methods) will be printed on failure in *addition* - to any explicit message passed. - * maxDiff: sets the maximum length of a diff in failure messages - by assert methods using difflib. It is looked up as an instance - attribute so can be configured by individual tests if required. - """ - - failureException = AssertionError - - longMessage = False - - maxDiff = 80*8 - - # If a string is longer than _diffThreshold, use normal comparison instead - # of difflib. See #11763. - _diffThreshold = 2**16 - - # Attribute used by TestSuite for classSetUp - - _classSetupFailed = False - - def __init__(self, methodName='runTest'): - """Create an instance of the class that will use the named test - method when executed. Raises a ValueError if the instance does - not have a method with the specified name. - """ - self._testMethodName = methodName - self._resultForDoCleanups = None - try: - testMethod = getattr(self, methodName) - except AttributeError: - raise ValueError("no such test method in %s: %s" % - (self.__class__, methodName)) - self._testMethodDoc = testMethod.__doc__ - self._cleanups = [] - - # Map types to custom assertEqual functions that will compare - # instances of said type in more detail to generate a more useful - # error message. - self._type_equality_funcs = {} - self.addTypeEqualityFunc(dict, 'assertDictEqual') - self.addTypeEqualityFunc(list, 'assertListEqual') - self.addTypeEqualityFunc(tuple, 'assertTupleEqual') - self.addTypeEqualityFunc(set, 'assertSetEqual') - self.addTypeEqualityFunc(frozenset, 'assertSetEqual') - try: - self.addTypeEqualityFunc(unicode, 'assertMultiLineEqual') - except NameError: - # No unicode support in this build - pass - - def addTypeEqualityFunc(self, typeobj, function): - """Add a type specific assertEqual style function to compare a type. - - This method is for use by TestCase subclasses that need to register - their own type equality functions to provide nicer error messages. - - Args: - typeobj: The data type to call this function on when both values - are of the same type in assertEqual(). - function: The callable taking two arguments and an optional - msg= argument that raises self.failureException with a - useful error message when the two arguments are not equal. - """ - self._type_equality_funcs[typeobj] = function - - def addCleanup(self, function, *args, **kwargs): - """Add a function, with arguments, to be called when the test is - completed. Functions added are called on a LIFO basis and are - called after tearDown on test failure or success. - - Cleanup items are called even if setUp fails (unlike tearDown).""" - self._cleanups.append((function, args, kwargs)) - - def setUp(self): - "Hook method for setting up the test fixture before exercising it." - pass - - def tearDown(self): - "Hook method for deconstructing the test fixture after testing it." - pass - - @classmethod - def setUpClass(cls): - "Hook method for setting up class fixture before running tests in the class." - - @classmethod - def tearDownClass(cls): - "Hook method for deconstructing the class fixture after running all tests in the class." - - def countTestCases(self): - return 1 - - def defaultTestResult(self): - return result.TestResult() - - def shortDescription(self): - """Returns a one-line description of the test, or None if no - description has been provided. - - The default implementation of this method returns the first line of - the specified test method's docstring. - """ - doc = self._testMethodDoc - return doc and doc.split("\n")[0].strip() or None - - - def id(self): - return "%s.%s" % (strclass(self.__class__), self._testMethodName) - - def __eq__(self, other): - if type(self) is not type(other): - return NotImplemented - - return self._testMethodName == other._testMethodName - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((type(self), self._testMethodName)) - - def __str__(self): - return "%s (%s)" % (self._testMethodName, strclass(self.__class__)) - - def __repr__(self): - return "<%s testMethod=%s>" % \ - (strclass(self.__class__), self._testMethodName) - - def _addSkip(self, result, reason): - addSkip = getattr(result, 'addSkip', None) - if addSkip is not None: - addSkip(self, reason) - else: - warnings.warn("TestResult has no addSkip method, skips not reported", - RuntimeWarning, 2) - result.addSuccess(self) - - def run(self, result=None): - orig_result = result - if result is None: - result = self.defaultTestResult() - startTestRun = getattr(result, 'startTestRun', None) - if startTestRun is not None: - startTestRun() - - self._resultForDoCleanups = result - result.startTest(self) - - testMethod = getattr(self, self._testMethodName) - if (getattr(self.__class__, "__unittest_skip__", False) or - getattr(testMethod, "__unittest_skip__", False)): - # If the class or method was skipped. - try: - skip_why = (getattr(self.__class__, '__unittest_skip_why__', '') - or getattr(testMethod, '__unittest_skip_why__', '')) - self._addSkip(result, skip_why) - finally: - result.stopTest(self) - return - try: - success = False - try: - self.setUp() - except SkipTest as e: - self._addSkip(result, str(e)) - except KeyboardInterrupt: - raise - except: - result.addError(self, sys.exc_info()) - else: - try: - testMethod() - except KeyboardInterrupt: - raise - except self.failureException: - result.addFailure(self, sys.exc_info()) - except _ExpectedFailure as e: - addExpectedFailure = getattr(result, 'addExpectedFailure', None) - if addExpectedFailure is not None: - addExpectedFailure(self, e.exc_info) - else: - warnings.warn("TestResult has no addExpectedFailure method, reporting as passes", - RuntimeWarning) - result.addSuccess(self) - except _UnexpectedSuccess: - addUnexpectedSuccess = getattr(result, 'addUnexpectedSuccess', None) - if addUnexpectedSuccess is not None: - addUnexpectedSuccess(self) - else: - warnings.warn("TestResult has no addUnexpectedSuccess method, reporting as failures", - RuntimeWarning) - result.addFailure(self, sys.exc_info()) - except SkipTest as e: - self._addSkip(result, str(e)) - except: - result.addError(self, sys.exc_info()) - else: - success = True - - try: - self.tearDown() - except KeyboardInterrupt: - raise - except: - result.addError(self, sys.exc_info()) - success = False - - cleanUpSuccess = self.doCleanups() - success = success and cleanUpSuccess - if success: - result.addSuccess(self) - finally: - result.stopTest(self) - if orig_result is None: - stopTestRun = getattr(result, 'stopTestRun', None) - if stopTestRun is not None: - stopTestRun() - - def doCleanups(self): - """Execute all cleanup functions. Normally called for you after - tearDown.""" - result = self._resultForDoCleanups - ok = True - while self._cleanups: - function, args, kwargs = self._cleanups.pop(-1) - try: - function(*args, **kwargs) - except KeyboardInterrupt: - raise - except: - ok = False - result.addError(self, sys.exc_info()) - return ok - - def __call__(self, *args, **kwds): - return self.run(*args, **kwds) - - def debug(self): - """Run the test without collecting errors in a TestResult""" - self.setUp() - getattr(self, self._testMethodName)() - self.tearDown() - while self._cleanups: - function, args, kwargs = self._cleanups.pop(-1) - function(*args, **kwargs) - - def skipTest(self, reason): - """Skip this test.""" - raise SkipTest(reason) - - def fail(self, msg=None): - """Fail immediately, with the given message.""" - raise self.failureException(msg) - - def assertFalse(self, expr, msg=None): - """Check that the expression is false.""" - if expr: - msg = self._formatMessage(msg, "%s is not false" % safe_repr(expr)) - raise self.failureException(msg) - - def assertTrue(self, expr, msg=None): - """Check that the expression is true.""" - if not expr: - msg = self._formatMessage(msg, "%s is not true" % safe_repr(expr)) - raise self.failureException(msg) - - def _formatMessage(self, msg, standardMsg): - """Honour the longMessage attribute when generating failure messages. - If longMessage is False this means: - * Use only an explicit message if it is provided - * Otherwise use the standard message for the assert - - If longMessage is True: - * Use the standard message - * If an explicit message is provided, plus ' : ' and the explicit message - """ - if not self.longMessage: - return msg or standardMsg - if msg is None: - return standardMsg - try: - # don't switch to '{}' formatting in Python 2.X - # it changes the way unicode input is handled - return '%s : %s' % (standardMsg, msg) - except UnicodeDecodeError: - return '%s : %s' % (safe_repr(standardMsg), safe_repr(msg)) - - - def assertRaises(self, excClass, callableObj=None, *args, **kwargs): - """Fail unless an exception of class excClass is raised - by callableObj when invoked with arguments args and keyword - arguments kwargs. If a different type of exception is - raised, it will not be caught, and the test case will be - deemed to have suffered an error, exactly as for an - unexpected exception. - - If called with callableObj omitted or None, will return a - context object used like this:: - - with self.assertRaises(SomeException): - do_something() - - The context manager keeps a reference to the exception as - the 'exception' attribute. This allows you to inspect the - exception after the assertion:: - - with self.assertRaises(SomeException) as cm: - do_something() - the_exception = cm.exception - self.assertEqual(the_exception.error_code, 3) - """ - context = _AssertRaisesContext(excClass, self) - if callableObj is None: - return context - with context: - callableObj(*args, **kwargs) - - def _getAssertEqualityFunc(self, first, second): - """Get a detailed comparison function for the types of the two args. - - Returns: A callable accepting (first, second, msg=None) that will - raise a failure exception if first != second with a useful human - readable error message for those types. - """ - # - # NOTE(gregory.p.smith): I considered isinstance(first, type(second)) - # and vice versa. I opted for the conservative approach in case - # subclasses are not intended to be compared in detail to their super - # class instances using a type equality func. This means testing - # subtypes won't automagically use the detailed comparison. Callers - # should use their type specific assertSpamEqual method to compare - # subclasses if the detailed comparison is desired and appropriate. - # See the discussion in http://bugs.python.org/issue2578. - # - if type(first) is type(second): - asserter = self._type_equality_funcs.get(type(first)) - if asserter is not None: - if isinstance(asserter, basestring): - asserter = getattr(self, asserter) - return asserter - - return self._baseAssertEqual - - def _baseAssertEqual(self, first, second, msg=None): - """The default assertEqual implementation, not type specific.""" - if not first == second: - standardMsg = '%s != %s' % (safe_repr(first), safe_repr(second)) - msg = self._formatMessage(msg, standardMsg) - raise self.failureException(msg) - - def assertEqual(self, first, second, msg=None): - """Fail if the two objects are unequal as determined by the '==' - operator. - """ - assertion_func = self._getAssertEqualityFunc(first, second) - assertion_func(first, second, msg=msg) - - def assertNotEqual(self, first, second, msg=None): - """Fail if the two objects are equal as determined by the '!=' - operator. - """ - if not first != second: - msg = self._formatMessage(msg, '%s == %s' % (safe_repr(first), - safe_repr(second))) - raise self.failureException(msg) - - - def assertAlmostEqual(self, first, second, places=None, msg=None, delta=None): - """Fail if the two objects are unequal as determined by their - difference rounded to the given number of decimal places - (default 7) and comparing to zero, or by comparing that the - between the two objects is more than the given delta. - - Note that decimal places (from zero) are usually not the same - as significant digits (measured from the most significant digit). - - If the two objects compare equal then they will automatically - compare almost equal. - """ - if first == second: - # shortcut - return - if delta is not None and places is not None: - raise TypeError("specify delta or places not both") - - if delta is not None: - if abs(first - second) <= delta: - return - - standardMsg = '%s != %s within %s delta' % (safe_repr(first), - safe_repr(second), - safe_repr(delta)) - else: - if places is None: - places = 7 - - if round(abs(second-first), places) == 0: - return - - standardMsg = '%s != %s within %r places' % (safe_repr(first), - safe_repr(second), - places) - msg = self._formatMessage(msg, standardMsg) - raise self.failureException(msg) - - def assertNotAlmostEqual(self, first, second, places=None, msg=None, delta=None): - """Fail if the two objects are equal as determined by their - difference rounded to the given number of decimal places - (default 7) and comparing to zero, or by comparing that the - between the two objects is less than the given delta. - - Note that decimal places (from zero) are usually not the same - as significant digits (measured from the most significant digit). - - Objects that are equal automatically fail. - """ - if delta is not None and places is not None: - raise TypeError("specify delta or places not both") - if delta is not None: - if not (first == second) and abs(first - second) > delta: - return - standardMsg = '%s == %s within %s delta' % (safe_repr(first), - safe_repr(second), - safe_repr(delta)) - else: - if places is None: - places = 7 - if not (first == second) and round(abs(second-first), places) != 0: - return - standardMsg = '%s == %s within %r places' % (safe_repr(first), - safe_repr(second), - places) - - msg = self._formatMessage(msg, standardMsg) - raise self.failureException(msg) - - # Synonyms for assertion methods - - # The plurals are undocumented. Keep them that way to discourage use. - # Do not add more. Do not remove. - # Going through a deprecation cycle on these would annoy many people. - assertEquals = assertEqual - assertNotEquals = assertNotEqual - assertAlmostEquals = assertAlmostEqual - assertNotAlmostEquals = assertNotAlmostEqual - assert_ = assertTrue - - # These fail* assertion method names are pending deprecation and will - # be a DeprecationWarning in 3.2; http://bugs.python.org/issue2578 - def _deprecate(original_func): - def deprecated_func(*args, **kwargs): - warnings.warn( - 'Please use {0} instead.'.format(original_func.__name__), - PendingDeprecationWarning, 2) - return original_func(*args, **kwargs) - return deprecated_func - - failUnlessEqual = _deprecate(assertEqual) - failIfEqual = _deprecate(assertNotEqual) - failUnlessAlmostEqual = _deprecate(assertAlmostEqual) - failIfAlmostEqual = _deprecate(assertNotAlmostEqual) - failUnless = _deprecate(assertTrue) - failUnlessRaises = _deprecate(assertRaises) - failIf = _deprecate(assertFalse) - - def assertSequenceEqual(self, seq1, seq2, msg=None, seq_type=None): - """An equality assertion for ordered sequences (like lists and tuples). - - For the purposes of this function, a valid ordered sequence type is one - which can be indexed, has a length, and has an equality operator. - - Args: - seq1: The first sequence to compare. - seq2: The second sequence to compare. - seq_type: The expected datatype of the sequences, or None if no - datatype should be enforced. - msg: Optional message to use on failure instead of a list of - differences. - """ - if seq_type is not None: - seq_type_name = seq_type.__name__ - if not isinstance(seq1, seq_type): - raise self.failureException('First sequence is not a %s: %s' - % (seq_type_name, safe_repr(seq1))) - if not isinstance(seq2, seq_type): - raise self.failureException('Second sequence is not a %s: %s' - % (seq_type_name, safe_repr(seq2))) - else: - seq_type_name = "sequence" - - differing = None - try: - len1 = len(seq1) - except (TypeError, NotImplementedError): - differing = 'First %s has no length. Non-sequence?' % ( - seq_type_name) - - if differing is None: - try: - len2 = len(seq2) - except (TypeError, NotImplementedError): - differing = 'Second %s has no length. Non-sequence?' % ( - seq_type_name) - - if differing is None: - if seq1 == seq2: - return - - seq1_repr = safe_repr(seq1) - seq2_repr = safe_repr(seq2) - if len(seq1_repr) > 30: - seq1_repr = seq1_repr[:30] + '...' - if len(seq2_repr) > 30: - seq2_repr = seq2_repr[:30] + '...' - elements = (seq_type_name.capitalize(), seq1_repr, seq2_repr) - differing = '%ss differ: %s != %s\n' % elements - - for i in xrange(min(len1, len2)): - try: - item1 = seq1[i] - except (TypeError, IndexError, NotImplementedError): - differing += ('\nUnable to index element %d of first %s\n' % - (i, seq_type_name)) - break - - try: - item2 = seq2[i] - except (TypeError, IndexError, NotImplementedError): - differing += ('\nUnable to index element %d of second %s\n' % - (i, seq_type_name)) - break - - if item1 != item2: - differing += ('\nFirst differing element %d:\n%s\n%s\n' % - (i, safe_repr(item1), safe_repr(item2))) - break - else: - if (len1 == len2 and seq_type is None and - type(seq1) != type(seq2)): - # The sequences are the same, but have differing types. - return - - if len1 > len2: - differing += ('\nFirst %s contains %d additional ' - 'elements.\n' % (seq_type_name, len1 - len2)) - try: - differing += ('First extra element %d:\n%s\n' % - (len2, safe_repr(seq1[len2]))) - except (TypeError, IndexError, NotImplementedError): - differing += ('Unable to index element %d ' - 'of first %s\n' % (len2, seq_type_name)) - elif len1 < len2: - differing += ('\nSecond %s contains %d additional ' - 'elements.\n' % (seq_type_name, len2 - len1)) - try: - differing += ('First extra element %d:\n%s\n' % - (len1, safe_repr(seq2[len1]))) - except (TypeError, IndexError, NotImplementedError): - differing += ('Unable to index element %d ' - 'of second %s\n' % (len1, seq_type_name)) - standardMsg = differing - diffMsg = '\n' + '\n'.join( - difflib.ndiff(pprint.pformat(seq1).splitlines(), - pprint.pformat(seq2).splitlines())) - standardMsg = self._truncateMessage(standardMsg, diffMsg) - msg = self._formatMessage(msg, standardMsg) - self.fail(msg) - - def _truncateMessage(self, message, diff): - max_diff = self.maxDiff - if max_diff is None or len(diff) <= max_diff: - return message + diff - return message + (DIFF_OMITTED % len(diff)) - - def assertListEqual(self, list1, list2, msg=None): - """A list-specific equality assertion. - - Args: - list1: The first list to compare. - list2: The second list to compare. - msg: Optional message to use on failure instead of a list of - differences. - - """ - self.assertSequenceEqual(list1, list2, msg, seq_type=list) - - def assertTupleEqual(self, tuple1, tuple2, msg=None): - """A tuple-specific equality assertion. - - Args: - tuple1: The first tuple to compare. - tuple2: The second tuple to compare. - msg: Optional message to use on failure instead of a list of - differences. - """ - self.assertSequenceEqual(tuple1, tuple2, msg, seq_type=tuple) - - def assertSetEqual(self, set1, set2, msg=None): - """A set-specific equality assertion. - - Args: - set1: The first set to compare. - set2: The second set to compare. - msg: Optional message to use on failure instead of a list of - differences. - - assertSetEqual uses ducktyping to support different types of sets, and - is optimized for sets specifically (parameters must support a - difference method). - """ - try: - difference1 = set1.difference(set2) - except TypeError, e: - self.fail('invalid type when attempting set difference: %s' % e) - except AttributeError, e: - self.fail('first argument does not support set difference: %s' % e) - - try: - difference2 = set2.difference(set1) - except TypeError, e: - self.fail('invalid type when attempting set difference: %s' % e) - except AttributeError, e: - self.fail('second argument does not support set difference: %s' % e) - - if not (difference1 or difference2): - return - - lines = [] - if difference1: - lines.append('Items in the first set but not the second:') - for item in difference1: - lines.append(repr(item)) - if difference2: - lines.append('Items in the second set but not the first:') - for item in difference2: - lines.append(repr(item)) - - standardMsg = '\n'.join(lines) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertIn(self, member, container, msg=None): - """Just like self.assertTrue(a in b), but with a nicer default message.""" - if member not in container: - standardMsg = '%s not found in %s' % (safe_repr(member), - safe_repr(container)) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertNotIn(self, member, container, msg=None): - """Just like self.assertTrue(a not in b), but with a nicer default message.""" - if member in container: - standardMsg = '%s unexpectedly found in %s' % (safe_repr(member), - safe_repr(container)) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertIs(self, expr1, expr2, msg=None): - """Just like self.assertTrue(a is b), but with a nicer default message.""" - if expr1 is not expr2: - standardMsg = '%s is not %s' % (safe_repr(expr1), - safe_repr(expr2)) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertIsNot(self, expr1, expr2, msg=None): - """Just like self.assertTrue(a is not b), but with a nicer default message.""" - if expr1 is expr2: - standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertDictEqual(self, d1, d2, msg=None): - self.assertIsInstance(d1, dict, 'First argument is not a dictionary') - self.assertIsInstance(d2, dict, 'Second argument is not a dictionary') - - if d1 != d2: - standardMsg = '%s != %s' % (safe_repr(d1, True), safe_repr(d2, True)) - diff = ('\n' + '\n'.join(difflib.ndiff( - pprint.pformat(d1).splitlines(), - pprint.pformat(d2).splitlines()))) - standardMsg = self._truncateMessage(standardMsg, diff) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertDictContainsSubset(self, expected, actual, msg=None): - """Checks whether actual is a superset of expected.""" - missing = [] - mismatched = [] - for key, value in expected.iteritems(): - if key not in actual: - missing.append(key) - elif value != actual[key]: - mismatched.append('%s, expected: %s, actual: %s' % - (safe_repr(key), safe_repr(value), - safe_repr(actual[key]))) - - if not (missing or mismatched): - return - - standardMsg = '' - if missing: - standardMsg = 'Missing: %s' % ','.join(safe_repr(m) for m in - missing) - if mismatched: - if standardMsg: - standardMsg += '; ' - standardMsg += 'Mismatched values: %s' % ','.join(mismatched) - - self.fail(self._formatMessage(msg, standardMsg)) - - def assertItemsEqual(self, expected_seq, actual_seq, msg=None): - """An unordered sequence specific comparison. It asserts that - actual_seq and expected_seq have the same element counts. - Equivalent to:: - - self.assertEqual(Counter(iter(actual_seq)), - Counter(iter(expected_seq))) - - Asserts that each element has the same count in both sequences. - Example: - - [0, 1, 1] and [1, 0, 1] compare equal. - - [0, 0, 1] and [0, 1] compare unequal. - """ - first_seq, second_seq = list(expected_seq), list(actual_seq) - with warnings.catch_warnings(): - if sys.py3kwarning: - # Silence Py3k warning raised during the sorting - for _msg in ["(code|dict|type) inequality comparisons", - "builtin_function_or_method order comparisons", - "comparing unequal types"]: - warnings.filterwarnings("ignore", _msg, DeprecationWarning) - try: - first = collections.Counter(first_seq) - second = collections.Counter(second_seq) - except TypeError: - # Handle case with unhashable elements - differences = _count_diff_all_purpose(first_seq, second_seq) - else: - if first == second: - return - differences = _count_diff_hashable(first_seq, second_seq) - - if differences: - standardMsg = 'Element counts were not equal:\n' - lines = ['First has %d, Second has %d: %r' % diff for diff in differences] - diffMsg = '\n'.join(lines) - standardMsg = self._truncateMessage(standardMsg, diffMsg) - msg = self._formatMessage(msg, standardMsg) - self.fail(msg) - - def assertMultiLineEqual(self, first, second, msg=None): - """Assert that two multi-line strings are equal.""" - self.assertIsInstance(first, basestring, - 'First argument is not a string') - self.assertIsInstance(second, basestring, - 'Second argument is not a string') - - if first != second: - # don't use difflib if the strings are too long - if (len(first) > self._diffThreshold or - len(second) > self._diffThreshold): - self._baseAssertEqual(first, second, msg) - firstlines = first.splitlines(True) - secondlines = second.splitlines(True) - if len(firstlines) == 1 and first.strip('\r\n') == first: - firstlines = [first + '\n'] - secondlines = [second + '\n'] - standardMsg = '%s != %s' % (safe_repr(first, True), - safe_repr(second, True)) - diff = '\n' + ''.join(difflib.ndiff(firstlines, secondlines)) - standardMsg = self._truncateMessage(standardMsg, diff) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertLess(self, a, b, msg=None): - """Just like self.assertTrue(a < b), but with a nicer default message.""" - if not a < b: - standardMsg = '%s not less than %s' % (safe_repr(a), safe_repr(b)) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertLessEqual(self, a, b, msg=None): - """Just like self.assertTrue(a <= b), but with a nicer default message.""" - if not a <= b: - standardMsg = '%s not less than or equal to %s' % (safe_repr(a), safe_repr(b)) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertGreater(self, a, b, msg=None): - """Just like self.assertTrue(a > b), but with a nicer default message.""" - if not a > b: - standardMsg = '%s not greater than %s' % (safe_repr(a), safe_repr(b)) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertGreaterEqual(self, a, b, msg=None): - """Just like self.assertTrue(a >= b), but with a nicer default message.""" - if not a >= b: - standardMsg = '%s not greater than or equal to %s' % (safe_repr(a), safe_repr(b)) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertIsNone(self, obj, msg=None): - """Same as self.assertTrue(obj is None), with a nicer default message.""" - if obj is not None: - standardMsg = '%s is not None' % (safe_repr(obj),) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertIsNotNone(self, obj, msg=None): - """Included for symmetry with assertIsNone.""" - if obj is None: - standardMsg = 'unexpectedly None' - self.fail(self._formatMessage(msg, standardMsg)) - - def assertIsInstance(self, obj, cls, msg=None): - """Same as self.assertTrue(isinstance(obj, cls)), with a nicer - default message.""" - if not isinstance(obj, cls): - standardMsg = '%s is not an instance of %r' % (safe_repr(obj), cls) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertNotIsInstance(self, obj, cls, msg=None): - """Included for symmetry with assertIsInstance.""" - if isinstance(obj, cls): - standardMsg = '%s is an instance of %r' % (safe_repr(obj), cls) - self.fail(self._formatMessage(msg, standardMsg)) - - def assertRaisesRegexp(self, expected_exception, expected_regexp, - callable_obj=None, *args, **kwargs): - """Asserts that the message in a raised exception matches a regexp. - - Args: - expected_exception: Exception class expected to be raised. - expected_regexp: Regexp (re pattern object or string) expected - to be found in error message. - callable_obj: Function to be called. - args: Extra args. - kwargs: Extra kwargs. - """ - if expected_regexp is not None: - expected_regexp = re.compile(expected_regexp) - context = _AssertRaisesContext(expected_exception, self, expected_regexp) - if callable_obj is None: - return context - with context: - callable_obj(*args, **kwargs) - - def assertRegexpMatches(self, text, expected_regexp, msg=None): - """Fail the test unless the text matches the regular expression.""" - if isinstance(expected_regexp, basestring): - expected_regexp = re.compile(expected_regexp) - if not expected_regexp.search(text): - msg = msg or "Regexp didn't match" - msg = '%s: %r not found in %r' % (msg, expected_regexp.pattern, text) - raise self.failureException(msg) - - def assertNotRegexpMatches(self, text, unexpected_regexp, msg=None): - """Fail the test if the text matches the regular expression.""" - if isinstance(unexpected_regexp, basestring): - unexpected_regexp = re.compile(unexpected_regexp) - match = unexpected_regexp.search(text) - if match: - msg = msg or "Regexp matched" - msg = '%s: %r matches %r in %r' % (msg, - text[match.start():match.end()], - unexpected_regexp.pattern, - text) - raise self.failureException(msg) - - -class FunctionTestCase(TestCase): - """A test case that wraps a test function. - - This is useful for slipping pre-existing test functions into the - unittest framework. Optionally, set-up and tidy-up functions can be - supplied. As with TestCase, the tidy-up ('tearDown') function will - always be called if the set-up ('setUp') function ran successfully. - """ - - def __init__(self, testFunc, setUp=None, tearDown=None, description=None): - super(FunctionTestCase, self).__init__() - self._setUpFunc = setUp - self._tearDownFunc = tearDown - self._testFunc = testFunc - self._description = description - - def setUp(self): - if self._setUpFunc is not None: - self._setUpFunc() - - def tearDown(self): - if self._tearDownFunc is not None: - self._tearDownFunc() - - def runTest(self): - self._testFunc() - - def id(self): - return self._testFunc.__name__ - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - - return self._setUpFunc == other._setUpFunc and \ - self._tearDownFunc == other._tearDownFunc and \ - self._testFunc == other._testFunc and \ - self._description == other._description - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((type(self), self._setUpFunc, self._tearDownFunc, - self._testFunc, self._description)) - - def __str__(self): - return "%s (%s)" % (strclass(self.__class__), - self._testFunc.__name__) - - def __repr__(self): - return "<%s tec=%s>" % (strclass(self.__class__), - self._testFunc) - - def shortDescription(self): - if self._description is not None: - return self._description - doc = self._testFunc.__doc__ - return doc and doc.split("\n")[0].strip() or None diff --git a/python/Lib/unittest/loader.py b/python/Lib/unittest/loader.py deleted file mode 100644 index 9163a1a00d..0000000000 --- a/python/Lib/unittest/loader.py +++ /dev/null @@ -1,316 +0,0 @@ -"""Loading unittests.""" - -import os -import re -import sys -import traceback -import types - -from functools import cmp_to_key as _CmpToKey -from fnmatch import fnmatch - -from . import case, suite - -__unittest = True - -# what about .pyc or .pyo (etc) -# we would need to avoid loading the same tests multiple times -# from '.py', '.pyc' *and* '.pyo' -VALID_MODULE_NAME = re.compile(r'[_a-z]\w*\.py$', re.IGNORECASE) - - -def _make_failed_import_test(name, suiteClass): - message = 'Failed to import test module: %s\n%s' % (name, traceback.format_exc()) - return _make_failed_test('ModuleImportFailure', name, ImportError(message), - suiteClass) - -def _make_failed_load_tests(name, exception, suiteClass): - return _make_failed_test('LoadTestsFailure', name, exception, suiteClass) - -def _make_failed_test(classname, methodname, exception, suiteClass): - def testFailure(self): - raise exception - attrs = {methodname: testFailure} - TestClass = type(classname, (case.TestCase,), attrs) - return suiteClass((TestClass(methodname),)) - - -class TestLoader(object): - """ - This class is responsible for loading tests according to various criteria - and returning them wrapped in a TestSuite - """ - testMethodPrefix = 'test' - sortTestMethodsUsing = cmp - suiteClass = suite.TestSuite - _top_level_dir = None - - def loadTestsFromTestCase(self, testCaseClass): - """Return a suite of all tests cases contained in testCaseClass""" - if issubclass(testCaseClass, suite.TestSuite): - raise TypeError("Test cases should not be derived from TestSuite." \ - " Maybe you meant to derive from TestCase?") - testCaseNames = self.getTestCaseNames(testCaseClass) - if not testCaseNames and hasattr(testCaseClass, 'runTest'): - testCaseNames = ['runTest'] - loaded_suite = self.suiteClass(map(testCaseClass, testCaseNames)) - return loaded_suite - - def loadTestsFromModule(self, module, use_load_tests=True): - """Return a suite of all tests cases contained in the given module""" - tests = [] - for name in dir(module): - obj = getattr(module, name) - if isinstance(obj, type) and issubclass(obj, case.TestCase): - tests.append(self.loadTestsFromTestCase(obj)) - - load_tests = getattr(module, 'load_tests', None) - tests = self.suiteClass(tests) - if use_load_tests and load_tests is not None: - try: - return load_tests(self, tests, None) - except Exception, e: - return _make_failed_load_tests(module.__name__, e, - self.suiteClass) - return tests - - def loadTestsFromName(self, name, module=None): - """Return a suite of all tests cases given a string specifier. - - The name may resolve either to a module, a test case class, a - test method within a test case class, or a callable object which - returns a TestCase or TestSuite instance. - - The method optionally resolves the names relative to a given module. - """ - parts = name.split('.') - if module is None: - parts_copy = parts[:] - while parts_copy: - try: - module = __import__('.'.join(parts_copy)) - break - except ImportError: - del parts_copy[-1] - if not parts_copy: - raise - parts = parts[1:] - obj = module - for part in parts: - parent, obj = obj, getattr(obj, part) - - if isinstance(obj, types.ModuleType): - return self.loadTestsFromModule(obj) - elif isinstance(obj, type) and issubclass(obj, case.TestCase): - return self.loadTestsFromTestCase(obj) - elif (isinstance(obj, types.UnboundMethodType) and - isinstance(parent, type) and - issubclass(parent, case.TestCase)): - name = parts[-1] - inst = parent(name) - return self.suiteClass([inst]) - elif isinstance(obj, suite.TestSuite): - return obj - elif hasattr(obj, '__call__'): - test = obj() - if isinstance(test, suite.TestSuite): - return test - elif isinstance(test, case.TestCase): - return self.suiteClass([test]) - else: - raise TypeError("calling %s returned %s, not a test" % - (obj, test)) - else: - raise TypeError("don't know how to make test from: %s" % obj) - - def loadTestsFromNames(self, names, module=None): - """Return a suite of all tests cases found using the given sequence - of string specifiers. See 'loadTestsFromName()'. - """ - suites = [self.loadTestsFromName(name, module) for name in names] - return self.suiteClass(suites) - - def getTestCaseNames(self, testCaseClass): - """Return a sorted sequence of method names found within testCaseClass - """ - def isTestMethod(attrname, testCaseClass=testCaseClass, - prefix=self.testMethodPrefix): - return attrname.startswith(prefix) and \ - hasattr(getattr(testCaseClass, attrname), '__call__') - testFnNames = filter(isTestMethod, dir(testCaseClass)) - if self.sortTestMethodsUsing: - testFnNames.sort(key=_CmpToKey(self.sortTestMethodsUsing)) - return testFnNames - - def discover(self, start_dir, pattern='test*.py', top_level_dir=None): - """Find and return all test modules from the specified start - directory, recursing into subdirectories to find them. Only test files - that match the pattern will be loaded. (Using shell style pattern - matching.) - - All test modules must be importable from the top level of the project. - If the start directory is not the top level directory then the top - level directory must be specified separately. - - If a test package name (directory with '__init__.py') matches the - pattern then the package will be checked for a 'load_tests' function. If - this exists then it will be called with loader, tests, pattern. - - If load_tests exists then discovery does *not* recurse into the package, - load_tests is responsible for loading all tests in the package. - - The pattern is deliberately not stored as a loader attribute so that - packages can continue discovery themselves. top_level_dir is stored so - load_tests does not need to pass this argument in to loader.discover(). - """ - set_implicit_top = False - if top_level_dir is None and self._top_level_dir is not None: - # make top_level_dir optional if called from load_tests in a package - top_level_dir = self._top_level_dir - elif top_level_dir is None: - set_implicit_top = True - top_level_dir = start_dir - - top_level_dir = os.path.abspath(top_level_dir) - - if not top_level_dir in sys.path: - # all test modules must be importable from the top level directory - # should we *unconditionally* put the start directory in first - # in sys.path to minimise likelihood of conflicts between installed - # modules and development versions? - sys.path.insert(0, top_level_dir) - self._top_level_dir = top_level_dir - - is_not_importable = False - if os.path.isdir(os.path.abspath(start_dir)): - start_dir = os.path.abspath(start_dir) - if start_dir != top_level_dir: - is_not_importable = not os.path.isfile(os.path.join(start_dir, '__init__.py')) - else: - # support for discovery from dotted module names - try: - __import__(start_dir) - except ImportError: - is_not_importable = True - else: - the_module = sys.modules[start_dir] - top_part = start_dir.split('.')[0] - start_dir = os.path.abspath(os.path.dirname((the_module.__file__))) - if set_implicit_top: - self._top_level_dir = self._get_directory_containing_module(top_part) - sys.path.remove(top_level_dir) - - if is_not_importable: - raise ImportError('Start directory is not importable: %r' % start_dir) - - tests = list(self._find_tests(start_dir, pattern)) - return self.suiteClass(tests) - - def _get_directory_containing_module(self, module_name): - module = sys.modules[module_name] - full_path = os.path.abspath(module.__file__) - - if os.path.basename(full_path).lower().startswith('__init__.py'): - return os.path.dirname(os.path.dirname(full_path)) - else: - # here we have been given a module rather than a package - so - # all we can do is search the *same* directory the module is in - # should an exception be raised instead - return os.path.dirname(full_path) - - def _get_name_from_path(self, path): - path = os.path.splitext(os.path.normpath(path))[0] - - _relpath = os.path.relpath(path, self._top_level_dir) - assert not os.path.isabs(_relpath), "Path must be within the project" - assert not _relpath.startswith('..'), "Path must be within the project" - - name = _relpath.replace(os.path.sep, '.') - return name - - def _get_module_from_name(self, name): - __import__(name) - return sys.modules[name] - - def _match_path(self, path, full_path, pattern): - # override this method to use alternative matching strategy - return fnmatch(path, pattern) - - def _find_tests(self, start_dir, pattern): - """Used by discovery. Yields test suites it loads.""" - paths = os.listdir(start_dir) - - for path in paths: - full_path = os.path.join(start_dir, path) - if os.path.isfile(full_path): - if not VALID_MODULE_NAME.match(path): - # valid Python identifiers only - continue - if not self._match_path(path, full_path, pattern): - continue - # if the test file matches, load it - name = self._get_name_from_path(full_path) - try: - module = self._get_module_from_name(name) - except: - yield _make_failed_import_test(name, self.suiteClass) - else: - mod_file = os.path.abspath(getattr(module, '__file__', full_path)) - realpath = os.path.splitext(os.path.realpath(mod_file))[0] - fullpath_noext = os.path.splitext(os.path.realpath(full_path))[0] - if realpath.lower() != fullpath_noext.lower(): - module_dir = os.path.dirname(realpath) - mod_name = os.path.splitext(os.path.basename(full_path))[0] - expected_dir = os.path.dirname(full_path) - msg = ("%r module incorrectly imported from %r. Expected %r. " - "Is this module globally installed?") - raise ImportError(msg % (mod_name, module_dir, expected_dir)) - yield self.loadTestsFromModule(module) - elif os.path.isdir(full_path): - if not os.path.isfile(os.path.join(full_path, '__init__.py')): - continue - - load_tests = None - tests = None - if fnmatch(path, pattern): - # only check load_tests if the package directory itself matches the filter - name = self._get_name_from_path(full_path) - package = self._get_module_from_name(name) - load_tests = getattr(package, 'load_tests', None) - tests = self.loadTestsFromModule(package, use_load_tests=False) - - if load_tests is None: - if tests is not None: - # tests loaded from package file - yield tests - # recurse into the package - for test in self._find_tests(full_path, pattern): - yield test - else: - try: - yield load_tests(self, tests, pattern) - except Exception, e: - yield _make_failed_load_tests(package.__name__, e, - self.suiteClass) - -defaultTestLoader = TestLoader() - - -def _makeLoader(prefix, sortUsing, suiteClass=None): - loader = TestLoader() - loader.sortTestMethodsUsing = sortUsing - loader.testMethodPrefix = prefix - if suiteClass: - loader.suiteClass = suiteClass - return loader - -def getTestCaseNames(testCaseClass, prefix, sortUsing=cmp): - return _makeLoader(prefix, sortUsing).getTestCaseNames(testCaseClass) - -def makeSuite(testCaseClass, prefix='test', sortUsing=cmp, - suiteClass=suite.TestSuite): - return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromTestCase(testCaseClass) - -def findTestCases(module, prefix='test', sortUsing=cmp, - suiteClass=suite.TestSuite): - return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromModule(module) diff --git a/python/Lib/unittest/main.py b/python/Lib/unittest/main.py deleted file mode 100644 index ca99ac6417..0000000000 --- a/python/Lib/unittest/main.py +++ /dev/null @@ -1,236 +0,0 @@ -"""Unittest main program""" - -import sys -import os -import types - -from . import loader, runner -from .signals import installHandler - -__unittest = True - -FAILFAST = " -f, --failfast Stop on first failure\n" -CATCHBREAK = " -c, --catch Catch control-C and display results\n" -BUFFEROUTPUT = " -b, --buffer Buffer stdout and stderr during test runs\n" - -USAGE_AS_MAIN = """\ -Usage: %(progName)s [options] [tests] - -Options: - -h, --help Show this message - -v, --verbose Verbose output - -q, --quiet Minimal output -%(failfast)s%(catchbreak)s%(buffer)s -Examples: - %(progName)s test_module - run tests from test_module - %(progName)s module.TestClass - run tests from module.TestClass - %(progName)s module.Class.test_method - run specified test method - -[tests] can be a list of any number of test modules, classes and test -methods. - -Alternative Usage: %(progName)s discover [options] - -Options: - -v, --verbose Verbose output -%(failfast)s%(catchbreak)s%(buffer)s -s directory Directory to start discovery ('.' default) - -p pattern Pattern to match test files ('test*.py' default) - -t directory Top level directory of project (default to - start directory) - -For test discovery all test modules must be importable from the top -level directory of the project. -""" - -USAGE_FROM_MODULE = """\ -Usage: %(progName)s [options] [test] [...] - -Options: - -h, --help Show this message - -v, --verbose Verbose output - -q, --quiet Minimal output -%(failfast)s%(catchbreak)s%(buffer)s -Examples: - %(progName)s - run default set of tests - %(progName)s MyTestSuite - run suite 'MyTestSuite' - %(progName)s MyTestCase.testSomething - run MyTestCase.testSomething - %(progName)s MyTestCase - run all 'test*' test methods - in MyTestCase -""" - - - -class TestProgram(object): - """A command-line program that runs a set of tests; this is primarily - for making test modules conveniently executable. - """ - USAGE = USAGE_FROM_MODULE - - # defaults for testing - failfast = catchbreak = buffer = progName = None - - def __init__(self, module='__main__', defaultTest=None, argv=None, - testRunner=None, testLoader=loader.defaultTestLoader, - exit=True, verbosity=1, failfast=None, catchbreak=None, - buffer=None): - if isinstance(module, basestring): - self.module = __import__(module) - for part in module.split('.')[1:]: - self.module = getattr(self.module, part) - else: - self.module = module - if argv is None: - argv = sys.argv - - self.exit = exit - self.failfast = failfast - self.catchbreak = catchbreak - self.verbosity = verbosity - self.buffer = buffer - self.defaultTest = defaultTest - self.testRunner = testRunner - self.testLoader = testLoader - self.progName = os.path.basename(argv[0]) - self.parseArgs(argv) - self.runTests() - - def usageExit(self, msg=None): - if msg: - print msg - usage = {'progName': self.progName, 'catchbreak': '', 'failfast': '', - 'buffer': ''} - if self.failfast != False: - usage['failfast'] = FAILFAST - if self.catchbreak != False: - usage['catchbreak'] = CATCHBREAK - if self.buffer != False: - usage['buffer'] = BUFFEROUTPUT - print self.USAGE % usage - sys.exit(2) - - def parseArgs(self, argv): - if len(argv) > 1 and argv[1].lower() == 'discover': - self._do_discovery(argv[2:]) - return - - import getopt - long_opts = ['help', 'verbose', 'quiet', 'failfast', 'catch', 'buffer'] - try: - options, args = getopt.getopt(argv[1:], 'hHvqfcb', long_opts) - for opt, value in options: - if opt in ('-h','-H','--help'): - self.usageExit() - if opt in ('-q','--quiet'): - self.verbosity = 0 - if opt in ('-v','--verbose'): - self.verbosity = 2 - if opt in ('-f','--failfast'): - if self.failfast is None: - self.failfast = True - # Should this raise an exception if -f is not valid? - if opt in ('-c','--catch'): - if self.catchbreak is None: - self.catchbreak = True - # Should this raise an exception if -c is not valid? - if opt in ('-b','--buffer'): - if self.buffer is None: - self.buffer = True - # Should this raise an exception if -b is not valid? - if len(args) == 0 and self.defaultTest is None: - # createTests will load tests from self.module - self.testNames = None - elif len(args) > 0: - self.testNames = args - if __name__ == '__main__': - # to support python -m unittest ... - self.module = None - else: - self.testNames = (self.defaultTest,) - self.createTests() - except getopt.error, msg: - self.usageExit(msg) - - def createTests(self): - if self.testNames is None: - self.test = self.testLoader.loadTestsFromModule(self.module) - else: - self.test = self.testLoader.loadTestsFromNames(self.testNames, - self.module) - - def _do_discovery(self, argv, Loader=None): - if Loader is None: - Loader = lambda: self.testLoader - - # handle command line args for test discovery - self.progName = '%s discover' % self.progName - import optparse - parser = optparse.OptionParser() - parser.prog = self.progName - parser.add_option('-v', '--verbose', dest='verbose', default=False, - help='Verbose output', action='store_true') - if self.failfast != False: - parser.add_option('-f', '--failfast', dest='failfast', default=False, - help='Stop on first fail or error', - action='store_true') - if self.catchbreak != False: - parser.add_option('-c', '--catch', dest='catchbreak', default=False, - help='Catch Ctrl-C and display results so far', - action='store_true') - if self.buffer != False: - parser.add_option('-b', '--buffer', dest='buffer', default=False, - help='Buffer stdout and stderr during tests', - action='store_true') - parser.add_option('-s', '--start-directory', dest='start', default='.', - help="Directory to start discovery ('.' default)") - parser.add_option('-p', '--pattern', dest='pattern', default='test*.py', - help="Pattern to match tests ('test*.py' default)") - parser.add_option('-t', '--top-level-directory', dest='top', default=None, - help='Top level directory of project (defaults to start directory)') - - options, args = parser.parse_args(argv) - if len(args) > 3: - self.usageExit() - - for name, value in zip(('start', 'pattern', 'top'), args): - setattr(options, name, value) - - # only set options from the parsing here - # if they weren't set explicitly in the constructor - if self.failfast is None: - self.failfast = options.failfast - if self.catchbreak is None: - self.catchbreak = options.catchbreak - if self.buffer is None: - self.buffer = options.buffer - - if options.verbose: - self.verbosity = 2 - - start_dir = options.start - pattern = options.pattern - top_level_dir = options.top - - loader = Loader() - self.test = loader.discover(start_dir, pattern, top_level_dir) - - def runTests(self): - if self.catchbreak: - installHandler() - if self.testRunner is None: - self.testRunner = runner.TextTestRunner - if isinstance(self.testRunner, (type, types.ClassType)): - try: - testRunner = self.testRunner(verbosity=self.verbosity, - failfast=self.failfast, - buffer=self.buffer) - except TypeError: - # didn't accept the verbosity, buffer or failfast arguments - testRunner = self.testRunner() - else: - # it is assumed to be a TestRunner instance - testRunner = self.testRunner - self.result = testRunner.run(self.test) - if self.exit: - sys.exit(not self.result.wasSuccessful()) - -main = TestProgram diff --git a/python/Lib/unittest/result.py b/python/Lib/unittest/result.py deleted file mode 100644 index 8af972fac7..0000000000 --- a/python/Lib/unittest/result.py +++ /dev/null @@ -1,193 +0,0 @@ -"""Test result object""" - -import os -import sys -import traceback - -from StringIO import StringIO - -from . import util -from functools import wraps - -__unittest = True - -def failfast(method): - @wraps(method) - def inner(self, *args, **kw): - if getattr(self, 'failfast', False): - self.stop() - return method(self, *args, **kw) - return inner - -STDOUT_LINE = '\nStdout:\n%s' -STDERR_LINE = '\nStderr:\n%s' - - -class TestResult(object): - """Holder for test result information. - - Test results are automatically managed by the TestCase and TestSuite - classes, and do not need to be explicitly manipulated by writers of tests. - - Each instance holds the total number of tests run, and collections of - failures and errors that occurred among those test runs. The collections - contain tuples of (testcase, exceptioninfo), where exceptioninfo is the - formatted traceback of the error that occurred. - """ - _previousTestClass = None - _testRunEntered = False - _moduleSetUpFailed = False - def __init__(self, stream=None, descriptions=None, verbosity=None): - self.failfast = False - self.failures = [] - self.errors = [] - self.testsRun = 0 - self.skipped = [] - self.expectedFailures = [] - self.unexpectedSuccesses = [] - self.shouldStop = False - self.buffer = False - self._stdout_buffer = None - self._stderr_buffer = None - self._original_stdout = sys.stdout - self._original_stderr = sys.stderr - self._mirrorOutput = False - - def printErrors(self): - "Called by TestRunner after test run" - - def startTest(self, test): - "Called when the given test is about to be run" - self.testsRun += 1 - self._mirrorOutput = False - self._setupStdout() - - def _setupStdout(self): - if self.buffer: - if self._stderr_buffer is None: - self._stderr_buffer = StringIO() - self._stdout_buffer = StringIO() - sys.stdout = self._stdout_buffer - sys.stderr = self._stderr_buffer - - def startTestRun(self): - """Called once before any tests are executed. - - See startTest for a method called before each test. - """ - - def stopTest(self, test): - """Called when the given test has been run""" - self._restoreStdout() - self._mirrorOutput = False - - def _restoreStdout(self): - if self.buffer: - if self._mirrorOutput: - output = sys.stdout.getvalue() - error = sys.stderr.getvalue() - if output: - if not output.endswith('\n'): - output += '\n' - self._original_stdout.write(STDOUT_LINE % output) - if error: - if not error.endswith('\n'): - error += '\n' - self._original_stderr.write(STDERR_LINE % error) - - sys.stdout = self._original_stdout - sys.stderr = self._original_stderr - self._stdout_buffer.seek(0) - self._stdout_buffer.truncate() - self._stderr_buffer.seek(0) - self._stderr_buffer.truncate() - - def stopTestRun(self): - """Called once after all tests are executed. - - See stopTest for a method called after each test. - """ - - @failfast - def addError(self, test, err): - """Called when an error has occurred. 'err' is a tuple of values as - returned by sys.exc_info(). - """ - self.errors.append((test, self._exc_info_to_string(err, test))) - self._mirrorOutput = True - - @failfast - def addFailure(self, test, err): - """Called when an error has occurred. 'err' is a tuple of values as - returned by sys.exc_info().""" - self.failures.append((test, self._exc_info_to_string(err, test))) - self._mirrorOutput = True - - def addSuccess(self, test): - "Called when a test has completed successfully" - pass - - def addSkip(self, test, reason): - """Called when a test is skipped.""" - self.skipped.append((test, reason)) - - def addExpectedFailure(self, test, err): - """Called when an expected failure/error occurred.""" - self.expectedFailures.append( - (test, self._exc_info_to_string(err, test))) - - @failfast - def addUnexpectedSuccess(self, test): - """Called when a test was expected to fail, but succeed.""" - self.unexpectedSuccesses.append(test) - - def wasSuccessful(self): - "Tells whether or not this result was a success" - return len(self.failures) == len(self.errors) == 0 - - def stop(self): - "Indicates that the tests should be aborted" - self.shouldStop = True - - def _exc_info_to_string(self, err, test): - """Converts a sys.exc_info()-style tuple of values into a string.""" - exctype, value, tb = err - # Skip test runner traceback levels - while tb and self._is_relevant_tb_level(tb): - tb = tb.tb_next - - if exctype is test.failureException: - # Skip assert*() traceback levels - length = self._count_relevant_tb_levels(tb) - msgLines = traceback.format_exception(exctype, value, tb, length) - else: - msgLines = traceback.format_exception(exctype, value, tb) - - if self.buffer: - output = sys.stdout.getvalue() - error = sys.stderr.getvalue() - if output: - if not output.endswith('\n'): - output += '\n' - msgLines.append(STDOUT_LINE % output) - if error: - if not error.endswith('\n'): - error += '\n' - msgLines.append(STDERR_LINE % error) - return ''.join(msgLines) - - - def _is_relevant_tb_level(self, tb): - return '__unittest' in tb.tb_frame.f_globals - - def _count_relevant_tb_levels(self, tb): - length = 0 - while tb and not self._is_relevant_tb_level(tb): - length += 1 - tb = tb.tb_next - return length - - def __repr__(self): - return ("<%s run=%i errors=%i failures=%i>" % - (util.strclass(self.__class__), self.testsRun, len(self.errors), - len(self.failures))) diff --git a/python/Lib/unittest/runner.py b/python/Lib/unittest/runner.py deleted file mode 100644 index 7632fe9823..0000000000 --- a/python/Lib/unittest/runner.py +++ /dev/null @@ -1,196 +0,0 @@ -"""Running tests""" - -import sys -import time - -from . import result -from .signals import registerResult - -__unittest = True - - -class _WritelnDecorator(object): - """Used to decorate file-like objects with a handy 'writeln' method""" - def __init__(self,stream): - self.stream = stream - - def __getattr__(self, attr): - if attr in ('stream', '__getstate__'): - raise AttributeError(attr) - return getattr(self.stream,attr) - - def writeln(self, arg=None): - if arg: - self.write(arg) - self.write('\n') # text-mode streams translate to \r\n if needed - - -class TextTestResult(result.TestResult): - """A test result class that can print formatted text results to a stream. - - Used by TextTestRunner. - """ - separator1 = '=' * 70 - separator2 = '-' * 70 - - def __init__(self, stream, descriptions, verbosity): - super(TextTestResult, self).__init__(stream, descriptions, verbosity) - self.stream = stream - self.showAll = verbosity > 1 - self.dots = verbosity == 1 - self.descriptions = descriptions - - def getDescription(self, test): - doc_first_line = test.shortDescription() - if self.descriptions and doc_first_line: - return '\n'.join((str(test), doc_first_line)) - else: - return str(test) - - def startTest(self, test): - super(TextTestResult, self).startTest(test) - if self.showAll: - self.stream.write(self.getDescription(test)) - self.stream.write(" ... ") - self.stream.flush() - - def addSuccess(self, test): - super(TextTestResult, self).addSuccess(test) - if self.showAll: - self.stream.writeln("ok") - elif self.dots: - self.stream.write('.') - self.stream.flush() - - def addError(self, test, err): - super(TextTestResult, self).addError(test, err) - if self.showAll: - self.stream.writeln("ERROR") - elif self.dots: - self.stream.write('E') - self.stream.flush() - - def addFailure(self, test, err): - super(TextTestResult, self).addFailure(test, err) - if self.showAll: - self.stream.writeln("FAIL") - elif self.dots: - self.stream.write('F') - self.stream.flush() - - def addSkip(self, test, reason): - super(TextTestResult, self).addSkip(test, reason) - if self.showAll: - self.stream.writeln("skipped {0!r}".format(reason)) - elif self.dots: - self.stream.write("s") - self.stream.flush() - - def addExpectedFailure(self, test, err): - super(TextTestResult, self).addExpectedFailure(test, err) - if self.showAll: - self.stream.writeln("expected failure") - elif self.dots: - self.stream.write("x") - self.stream.flush() - - def addUnexpectedSuccess(self, test): - super(TextTestResult, self).addUnexpectedSuccess(test) - if self.showAll: - self.stream.writeln("unexpected success") - elif self.dots: - self.stream.write("u") - self.stream.flush() - - def printErrors(self): - if self.dots or self.showAll: - self.stream.writeln() - self.printErrorList('ERROR', self.errors) - self.printErrorList('FAIL', self.failures) - - def printErrorList(self, flavour, errors): - for test, err in errors: - self.stream.writeln(self.separator1) - self.stream.writeln("%s: %s" % (flavour,self.getDescription(test))) - self.stream.writeln(self.separator2) - self.stream.writeln("%s" % err) - - -class TextTestRunner(object): - """A test runner class that displays results in textual form. - - It prints out the names of tests as they are run, errors as they - occur, and a summary of the results at the end of the test run. - """ - resultclass = TextTestResult - - def __init__(self, stream=sys.stderr, descriptions=True, verbosity=1, - failfast=False, buffer=False, resultclass=None): - self.stream = _WritelnDecorator(stream) - self.descriptions = descriptions - self.verbosity = verbosity - self.failfast = failfast - self.buffer = buffer - if resultclass is not None: - self.resultclass = resultclass - - def _makeResult(self): - return self.resultclass(self.stream, self.descriptions, self.verbosity) - - def run(self, test): - "Run the given test case or test suite." - result = self._makeResult() - registerResult(result) - result.failfast = self.failfast - result.buffer = self.buffer - startTime = time.time() - startTestRun = getattr(result, 'startTestRun', None) - if startTestRun is not None: - startTestRun() - try: - test(result) - finally: - stopTestRun = getattr(result, 'stopTestRun', None) - if stopTestRun is not None: - stopTestRun() - stopTime = time.time() - timeTaken = stopTime - startTime - result.printErrors() - if hasattr(result, 'separator2'): - self.stream.writeln(result.separator2) - run = result.testsRun - self.stream.writeln("Ran %d test%s in %.3fs" % - (run, run != 1 and "s" or "", timeTaken)) - self.stream.writeln() - - expectedFails = unexpectedSuccesses = skipped = 0 - try: - results = map(len, (result.expectedFailures, - result.unexpectedSuccesses, - result.skipped)) - except AttributeError: - pass - else: - expectedFails, unexpectedSuccesses, skipped = results - - infos = [] - if not result.wasSuccessful(): - self.stream.write("FAILED") - failed, errored = map(len, (result.failures, result.errors)) - if failed: - infos.append("failures=%d" % failed) - if errored: - infos.append("errors=%d" % errored) - else: - self.stream.write("OK") - if skipped: - infos.append("skipped=%d" % skipped) - if expectedFails: - infos.append("expected failures=%d" % expectedFails) - if unexpectedSuccesses: - infos.append("unexpected successes=%d" % unexpectedSuccesses) - if infos: - self.stream.writeln(" (%s)" % (", ".join(infos),)) - else: - self.stream.write("\n") - return result diff --git a/python/Lib/unittest/signals.py b/python/Lib/unittest/signals.py deleted file mode 100644 index e6a5fc5243..0000000000 --- a/python/Lib/unittest/signals.py +++ /dev/null @@ -1,71 +0,0 @@ -import signal -import weakref - -from functools import wraps - -__unittest = True - - -class _InterruptHandler(object): - def __init__(self, default_handler): - self.called = False - self.original_handler = default_handler - if isinstance(default_handler, int): - if default_handler == signal.SIG_DFL: - # Pretend it's signal.default_int_handler instead. - default_handler = signal.default_int_handler - elif default_handler == signal.SIG_IGN: - # Not quite the same thing as SIG_IGN, but the closest we - # can make it: do nothing. - def default_handler(unused_signum, unused_frame): - pass - else: - raise TypeError("expected SIGINT signal handler to be " - "signal.SIG_IGN, signal.SIG_DFL, or a " - "callable object") - self.default_handler = default_handler - - def __call__(self, signum, frame): - installed_handler = signal.getsignal(signal.SIGINT) - if installed_handler is not self: - # if we aren't the installed handler, then delegate immediately - # to the default handler - self.default_handler(signum, frame) - - if self.called: - self.default_handler(signum, frame) - self.called = True - for result in _results.keys(): - result.stop() - -_results = weakref.WeakKeyDictionary() -def registerResult(result): - _results[result] = 1 - -def removeResult(result): - return bool(_results.pop(result, None)) - -_interrupt_handler = None -def installHandler(): - global _interrupt_handler - if _interrupt_handler is None: - default_handler = signal.getsignal(signal.SIGINT) - _interrupt_handler = _InterruptHandler(default_handler) - signal.signal(signal.SIGINT, _interrupt_handler) - - -def removeHandler(method=None): - if method is not None: - @wraps(method) - def inner(*args, **kwargs): - initial = signal.getsignal(signal.SIGINT) - removeHandler() - try: - return method(*args, **kwargs) - finally: - signal.signal(signal.SIGINT, initial) - return inner - - global _interrupt_handler - if _interrupt_handler is not None: - signal.signal(signal.SIGINT, _interrupt_handler.original_handler) diff --git a/python/Lib/unittest/suite.py b/python/Lib/unittest/suite.py deleted file mode 100644 index 633af5cb08..0000000000 --- a/python/Lib/unittest/suite.py +++ /dev/null @@ -1,303 +0,0 @@ -"""TestSuite""" - -import sys - -from . import case -from . import util - -__unittest = True - - -def _call_if_exists(parent, attr): - func = getattr(parent, attr, lambda: None) - func() - - -class BaseTestSuite(object): - """A simple test suite that doesn't provide class or module shared fixtures. - """ - def __init__(self, tests=()): - self._tests = [] - self.addTests(tests) - - def __repr__(self): - return "<%s tests=%s>" % (util.strclass(self.__class__), list(self)) - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return list(self) == list(other) - - def __ne__(self, other): - return not self == other - - # Can't guarantee hash invariant, so flag as unhashable - __hash__ = None - - def __iter__(self): - return iter(self._tests) - - def countTestCases(self): - cases = 0 - for test in self: - cases += test.countTestCases() - return cases - - def addTest(self, test): - # sanity checks - if not hasattr(test, '__call__'): - raise TypeError("{} is not callable".format(repr(test))) - if isinstance(test, type) and issubclass(test, - (case.TestCase, TestSuite)): - raise TypeError("TestCases and TestSuites must be instantiated " - "before passing them to addTest()") - self._tests.append(test) - - def addTests(self, tests): - if isinstance(tests, basestring): - raise TypeError("tests must be an iterable of tests, not a string") - for test in tests: - self.addTest(test) - - def run(self, result): - for test in self: - if result.shouldStop: - break - test(result) - return result - - def __call__(self, *args, **kwds): - return self.run(*args, **kwds) - - def debug(self): - """Run the tests without collecting errors in a TestResult""" - for test in self: - test.debug() - - -class TestSuite(BaseTestSuite): - """A test suite is a composite test consisting of a number of TestCases. - - For use, create an instance of TestSuite, then add test case instances. - When all tests have been added, the suite can be passed to a test - runner, such as TextTestRunner. It will run the individual test cases - in the order in which they were added, aggregating the results. When - subclassing, do not forget to call the base class constructor. - """ - - def run(self, result, debug=False): - topLevel = False - if getattr(result, '_testRunEntered', False) is False: - result._testRunEntered = topLevel = True - - for test in self: - if result.shouldStop: - break - - if _isnotsuite(test): - self._tearDownPreviousClass(test, result) - self._handleModuleFixture(test, result) - self._handleClassSetUp(test, result) - result._previousTestClass = test.__class__ - - if (getattr(test.__class__, '_classSetupFailed', False) or - getattr(result, '_moduleSetUpFailed', False)): - continue - - if not debug: - test(result) - else: - test.debug() - - if topLevel: - self._tearDownPreviousClass(None, result) - self._handleModuleTearDown(result) - result._testRunEntered = False - return result - - def debug(self): - """Run the tests without collecting errors in a TestResult""" - debug = _DebugResult() - self.run(debug, True) - - ################################ - - def _handleClassSetUp(self, test, result): - previousClass = getattr(result, '_previousTestClass', None) - currentClass = test.__class__ - if currentClass == previousClass: - return - if result._moduleSetUpFailed: - return - if getattr(currentClass, "__unittest_skip__", False): - return - - try: - currentClass._classSetupFailed = False - except TypeError: - # test may actually be a function - # so its class will be a builtin-type - pass - - setUpClass = getattr(currentClass, 'setUpClass', None) - if setUpClass is not None: - _call_if_exists(result, '_setupStdout') - try: - setUpClass() - except Exception as e: - if isinstance(result, _DebugResult): - raise - currentClass._classSetupFailed = True - className = util.strclass(currentClass) - errorName = 'setUpClass (%s)' % className - self._addClassOrModuleLevelException(result, e, errorName) - finally: - _call_if_exists(result, '_restoreStdout') - - def _get_previous_module(self, result): - previousModule = None - previousClass = getattr(result, '_previousTestClass', None) - if previousClass is not None: - previousModule = previousClass.__module__ - return previousModule - - - def _handleModuleFixture(self, test, result): - previousModule = self._get_previous_module(result) - currentModule = test.__class__.__module__ - if currentModule == previousModule: - return - - self._handleModuleTearDown(result) - - result._moduleSetUpFailed = False - try: - module = sys.modules[currentModule] - except KeyError: - return - setUpModule = getattr(module, 'setUpModule', None) - if setUpModule is not None: - _call_if_exists(result, '_setupStdout') - try: - setUpModule() - except Exception, e: - if isinstance(result, _DebugResult): - raise - result._moduleSetUpFailed = True - errorName = 'setUpModule (%s)' % currentModule - self._addClassOrModuleLevelException(result, e, errorName) - finally: - _call_if_exists(result, '_restoreStdout') - - def _addClassOrModuleLevelException(self, result, exception, errorName): - error = _ErrorHolder(errorName) - addSkip = getattr(result, 'addSkip', None) - if addSkip is not None and isinstance(exception, case.SkipTest): - addSkip(error, str(exception)) - else: - result.addError(error, sys.exc_info()) - - def _handleModuleTearDown(self, result): - previousModule = self._get_previous_module(result) - if previousModule is None: - return - if result._moduleSetUpFailed: - return - - try: - module = sys.modules[previousModule] - except KeyError: - return - - tearDownModule = getattr(module, 'tearDownModule', None) - if tearDownModule is not None: - _call_if_exists(result, '_setupStdout') - try: - tearDownModule() - except Exception as e: - if isinstance(result, _DebugResult): - raise - errorName = 'tearDownModule (%s)' % previousModule - self._addClassOrModuleLevelException(result, e, errorName) - finally: - _call_if_exists(result, '_restoreStdout') - - def _tearDownPreviousClass(self, test, result): - previousClass = getattr(result, '_previousTestClass', None) - currentClass = test.__class__ - if currentClass == previousClass: - return - if getattr(previousClass, '_classSetupFailed', False): - return - if getattr(result, '_moduleSetUpFailed', False): - return - if getattr(previousClass, "__unittest_skip__", False): - return - - tearDownClass = getattr(previousClass, 'tearDownClass', None) - if tearDownClass is not None: - _call_if_exists(result, '_setupStdout') - try: - tearDownClass() - except Exception, e: - if isinstance(result, _DebugResult): - raise - className = util.strclass(previousClass) - errorName = 'tearDownClass (%s)' % className - self._addClassOrModuleLevelException(result, e, errorName) - finally: - _call_if_exists(result, '_restoreStdout') - - -class _ErrorHolder(object): - """ - Placeholder for a TestCase inside a result. As far as a TestResult - is concerned, this looks exactly like a unit test. Used to insert - arbitrary errors into a test suite run. - """ - # Inspired by the ErrorHolder from Twisted: - # http://twistedmatrix.com/trac/browser/trunk/twisted/trial/runner.py - - # attribute used by TestResult._exc_info_to_string - failureException = None - - def __init__(self, description): - self.description = description - - def id(self): - return self.description - - def shortDescription(self): - return None - - def __repr__(self): - return "" % (self.description,) - - def __str__(self): - return self.id() - - def run(self, result): - # could call result.addError(...) - but this test-like object - # shouldn't be run anyway - pass - - def __call__(self, result): - return self.run(result) - - def countTestCases(self): - return 0 - -def _isnotsuite(test): - "A crude way to tell apart testcases and suites with duck-typing" - try: - iter(test) - except TypeError: - return True - return False - - -class _DebugResult(object): - "Used by the TestSuite to hold previous class when running in debug." - _previousTestClass = None - _moduleSetUpFailed = False - shouldStop = False diff --git a/python/Lib/unittest/test/__init__.py b/python/Lib/unittest/test/__init__.py deleted file mode 100644 index 99b730b154..0000000000 --- a/python/Lib/unittest/test/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -import os -import sys -import unittest - - -here = os.path.dirname(__file__) -loader = unittest.defaultTestLoader - -def suite(): - suite = unittest.TestSuite() - for fn in os.listdir(here): - if fn.startswith("test") and fn.endswith(".py"): - modname = "unittest.test." + fn[:-3] - __import__(modname) - module = sys.modules[modname] - suite.addTest(loader.loadTestsFromModule(module)) - return suite - - -if __name__ == "__main__": - unittest.main(defaultTest="suite") diff --git a/python/Lib/unittest/test/dummy.py b/python/Lib/unittest/test/dummy.py deleted file mode 100644 index e4f14e4035..0000000000 --- a/python/Lib/unittest/test/dummy.py +++ /dev/null @@ -1 +0,0 @@ -# Empty module for testing the loading of modules diff --git a/python/Lib/unittest/test/support.py b/python/Lib/unittest/test/support.py deleted file mode 100644 index f1cf03b142..0000000000 --- a/python/Lib/unittest/test/support.py +++ /dev/null @@ -1,119 +0,0 @@ -import unittest - - -class TestHashing(object): - """Used as a mixin for TestCase""" - - # Check for a valid __hash__ implementation - def test_hash(self): - for obj_1, obj_2 in self.eq_pairs: - try: - if not hash(obj_1) == hash(obj_2): - self.fail("%r and %r do not hash equal" % (obj_1, obj_2)) - except KeyboardInterrupt: - raise - except Exception, e: - self.fail("Problem hashing %r and %r: %s" % (obj_1, obj_2, e)) - - for obj_1, obj_2 in self.ne_pairs: - try: - if hash(obj_1) == hash(obj_2): - self.fail("%s and %s hash equal, but shouldn't" % - (obj_1, obj_2)) - except KeyboardInterrupt: - raise - except Exception, e: - self.fail("Problem hashing %s and %s: %s" % (obj_1, obj_2, e)) - - -class TestEquality(object): - """Used as a mixin for TestCase""" - - # Check for a valid __eq__ implementation - def test_eq(self): - for obj_1, obj_2 in self.eq_pairs: - self.assertEqual(obj_1, obj_2) - self.assertEqual(obj_2, obj_1) - - # Check for a valid __ne__ implementation - def test_ne(self): - for obj_1, obj_2 in self.ne_pairs: - self.assertNotEqual(obj_1, obj_2) - self.assertNotEqual(obj_2, obj_1) - - -class LoggingResult(unittest.TestResult): - def __init__(self, log): - self._events = log - super(LoggingResult, self).__init__() - - def startTest(self, test): - self._events.append('startTest') - super(LoggingResult, self).startTest(test) - - def startTestRun(self): - self._events.append('startTestRun') - super(LoggingResult, self).startTestRun() - - def stopTest(self, test): - self._events.append('stopTest') - super(LoggingResult, self).stopTest(test) - - def stopTestRun(self): - self._events.append('stopTestRun') - super(LoggingResult, self).stopTestRun() - - def addFailure(self, *args): - self._events.append('addFailure') - super(LoggingResult, self).addFailure(*args) - - def addSuccess(self, *args): - self._events.append('addSuccess') - super(LoggingResult, self).addSuccess(*args) - - def addError(self, *args): - self._events.append('addError') - super(LoggingResult, self).addError(*args) - - def addSkip(self, *args): - self._events.append('addSkip') - super(LoggingResult, self).addSkip(*args) - - def addExpectedFailure(self, *args): - self._events.append('addExpectedFailure') - super(LoggingResult, self).addExpectedFailure(*args) - - def addUnexpectedSuccess(self, *args): - self._events.append('addUnexpectedSuccess') - super(LoggingResult, self).addUnexpectedSuccess(*args) - - -class ResultWithNoStartTestRunStopTestRun(object): - """An object honouring TestResult before startTestRun/stopTestRun.""" - - def __init__(self): - self.failures = [] - self.errors = [] - self.testsRun = 0 - self.skipped = [] - self.expectedFailures = [] - self.unexpectedSuccesses = [] - self.shouldStop = False - - def startTest(self, test): - pass - - def stopTest(self, test): - pass - - def addError(self, test): - pass - - def addFailure(self, test): - pass - - def addSuccess(self, test): - pass - - def wasSuccessful(self): - return True diff --git a/python/Lib/unittest/test/test_assertions.py b/python/Lib/unittest/test/test_assertions.py deleted file mode 100644 index e8f0f64b4c..0000000000 --- a/python/Lib/unittest/test/test_assertions.py +++ /dev/null @@ -1,290 +0,0 @@ -import datetime - -import unittest - - -class Test_Assertions(unittest.TestCase): - def test_AlmostEqual(self): - self.assertAlmostEqual(1.00000001, 1.0) - self.assertNotAlmostEqual(1.0000001, 1.0) - self.assertRaises(self.failureException, - self.assertAlmostEqual, 1.0000001, 1.0) - self.assertRaises(self.failureException, - self.assertNotAlmostEqual, 1.00000001, 1.0) - - self.assertAlmostEqual(1.1, 1.0, places=0) - self.assertRaises(self.failureException, - self.assertAlmostEqual, 1.1, 1.0, places=1) - - self.assertAlmostEqual(0, .1+.1j, places=0) - self.assertNotAlmostEqual(0, .1+.1j, places=1) - self.assertRaises(self.failureException, - self.assertAlmostEqual, 0, .1+.1j, places=1) - self.assertRaises(self.failureException, - self.assertNotAlmostEqual, 0, .1+.1j, places=0) - - self.assertAlmostEqual(float('inf'), float('inf')) - self.assertRaises(self.failureException, self.assertNotAlmostEqual, - float('inf'), float('inf')) - - def test_AmostEqualWithDelta(self): - self.assertAlmostEqual(1.1, 1.0, delta=0.5) - self.assertAlmostEqual(1.0, 1.1, delta=0.5) - self.assertNotAlmostEqual(1.1, 1.0, delta=0.05) - self.assertNotAlmostEqual(1.0, 1.1, delta=0.05) - - self.assertAlmostEqual(1.0, 1.0, delta=0.5) - self.assertRaises(self.failureException, self.assertNotAlmostEqual, - 1.0, 1.0, delta=0.5) - - self.assertRaises(self.failureException, self.assertAlmostEqual, - 1.1, 1.0, delta=0.05) - self.assertRaises(self.failureException, self.assertNotAlmostEqual, - 1.1, 1.0, delta=0.5) - - self.assertRaises(TypeError, self.assertAlmostEqual, - 1.1, 1.0, places=2, delta=2) - self.assertRaises(TypeError, self.assertNotAlmostEqual, - 1.1, 1.0, places=2, delta=2) - - first = datetime.datetime.now() - second = first + datetime.timedelta(seconds=10) - self.assertAlmostEqual(first, second, - delta=datetime.timedelta(seconds=20)) - self.assertNotAlmostEqual(first, second, - delta=datetime.timedelta(seconds=5)) - - def test_assertRaises(self): - def _raise(e): - raise e - self.assertRaises(KeyError, _raise, KeyError) - self.assertRaises(KeyError, _raise, KeyError("key")) - try: - self.assertRaises(KeyError, lambda: None) - except self.failureException as e: - self.assertIn("KeyError not raised", e.args) - else: - self.fail("assertRaises() didn't fail") - try: - self.assertRaises(KeyError, _raise, ValueError) - except ValueError: - pass - else: - self.fail("assertRaises() didn't let exception pass through") - with self.assertRaises(KeyError) as cm: - try: - raise KeyError - except Exception, e: - raise - self.assertIs(cm.exception, e) - - with self.assertRaises(KeyError): - raise KeyError("key") - try: - with self.assertRaises(KeyError): - pass - except self.failureException as e: - self.assertIn("KeyError not raised", e.args) - else: - self.fail("assertRaises() didn't fail") - try: - with self.assertRaises(KeyError): - raise ValueError - except ValueError: - pass - else: - self.fail("assertRaises() didn't let exception pass through") - - def testAssertNotRegexpMatches(self): - self.assertNotRegexpMatches('Ala ma kota', r'r+') - try: - self.assertNotRegexpMatches('Ala ma kota', r'k.t', 'Message') - except self.failureException, e: - self.assertIn("'kot'", e.args[0]) - self.assertIn('Message', e.args[0]) - else: - self.fail('assertNotRegexpMatches should have failed.') - - -class TestLongMessage(unittest.TestCase): - """Test that the individual asserts honour longMessage. - This actually tests all the message behaviour for - asserts that use longMessage.""" - - def setUp(self): - class TestableTestFalse(unittest.TestCase): - longMessage = False - failureException = self.failureException - - def testTest(self): - pass - - class TestableTestTrue(unittest.TestCase): - longMessage = True - failureException = self.failureException - - def testTest(self): - pass - - self.testableTrue = TestableTestTrue('testTest') - self.testableFalse = TestableTestFalse('testTest') - - def testDefault(self): - self.assertFalse(unittest.TestCase.longMessage) - - def test_formatMsg(self): - self.assertEqual(self.testableFalse._formatMessage(None, "foo"), "foo") - self.assertEqual(self.testableFalse._formatMessage("foo", "bar"), "foo") - - self.assertEqual(self.testableTrue._formatMessage(None, "foo"), "foo") - self.assertEqual(self.testableTrue._formatMessage("foo", "bar"), "bar : foo") - - # This blows up if _formatMessage uses string concatenation - self.testableTrue._formatMessage(object(), 'foo') - - def test_formatMessage_unicode_error(self): - one = ''.join(chr(i) for i in range(255)) - # this used to cause a UnicodeDecodeError constructing msg - self.testableTrue._formatMessage(one, u'\uFFFD') - - def assertMessages(self, methodName, args, errors): - def getMethod(i): - useTestableFalse = i < 2 - if useTestableFalse: - test = self.testableFalse - else: - test = self.testableTrue - return getattr(test, methodName) - - for i, expected_regexp in enumerate(errors): - testMethod = getMethod(i) - kwargs = {} - withMsg = i % 2 - if withMsg: - kwargs = {"msg": "oops"} - - with self.assertRaisesRegexp(self.failureException, - expected_regexp=expected_regexp): - testMethod(*args, **kwargs) - - def testAssertTrue(self): - self.assertMessages('assertTrue', (False,), - ["^False is not true$", "^oops$", "^False is not true$", - "^False is not true : oops$"]) - - def testAssertFalse(self): - self.assertMessages('assertFalse', (True,), - ["^True is not false$", "^oops$", "^True is not false$", - "^True is not false : oops$"]) - - def testNotEqual(self): - self.assertMessages('assertNotEqual', (1, 1), - ["^1 == 1$", "^oops$", "^1 == 1$", - "^1 == 1 : oops$"]) - - def testAlmostEqual(self): - self.assertMessages('assertAlmostEqual', (1, 2), - ["^1 != 2 within 7 places$", "^oops$", - "^1 != 2 within 7 places$", "^1 != 2 within 7 places : oops$"]) - - def testNotAlmostEqual(self): - self.assertMessages('assertNotAlmostEqual', (1, 1), - ["^1 == 1 within 7 places$", "^oops$", - "^1 == 1 within 7 places$", "^1 == 1 within 7 places : oops$"]) - - def test_baseAssertEqual(self): - self.assertMessages('_baseAssertEqual', (1, 2), - ["^1 != 2$", "^oops$", "^1 != 2$", "^1 != 2 : oops$"]) - - def testAssertSequenceEqual(self): - # Error messages are multiline so not testing on full message - # assertTupleEqual and assertListEqual delegate to this method - self.assertMessages('assertSequenceEqual', ([], [None]), - ["\+ \[None\]$", "^oops$", r"\+ \[None\]$", - r"\+ \[None\] : oops$"]) - - def testAssertSetEqual(self): - self.assertMessages('assertSetEqual', (set(), set([None])), - ["None$", "^oops$", "None$", - "None : oops$"]) - - def testAssertIn(self): - self.assertMessages('assertIn', (None, []), - ['^None not found in \[\]$', "^oops$", - '^None not found in \[\]$', - '^None not found in \[\] : oops$']) - - def testAssertNotIn(self): - self.assertMessages('assertNotIn', (None, [None]), - ['^None unexpectedly found in \[None\]$', "^oops$", - '^None unexpectedly found in \[None\]$', - '^None unexpectedly found in \[None\] : oops$']) - - def testAssertDictEqual(self): - self.assertMessages('assertDictEqual', ({}, {'key': 'value'}), - [r"\+ \{'key': 'value'\}$", "^oops$", - "\+ \{'key': 'value'\}$", - "\+ \{'key': 'value'\} : oops$"]) - - def testAssertDictContainsSubset(self): - self.assertMessages('assertDictContainsSubset', ({'key': 'value'}, {}), - ["^Missing: 'key'$", "^oops$", - "^Missing: 'key'$", - "^Missing: 'key' : oops$"]) - - def testAssertMultiLineEqual(self): - self.assertMessages('assertMultiLineEqual', ("", "foo"), - [r"\+ foo$", "^oops$", - r"\+ foo$", - r"\+ foo : oops$"]) - - def testAssertLess(self): - self.assertMessages('assertLess', (2, 1), - ["^2 not less than 1$", "^oops$", - "^2 not less than 1$", "^2 not less than 1 : oops$"]) - - def testAssertLessEqual(self): - self.assertMessages('assertLessEqual', (2, 1), - ["^2 not less than or equal to 1$", "^oops$", - "^2 not less than or equal to 1$", - "^2 not less than or equal to 1 : oops$"]) - - def testAssertGreater(self): - self.assertMessages('assertGreater', (1, 2), - ["^1 not greater than 2$", "^oops$", - "^1 not greater than 2$", - "^1 not greater than 2 : oops$"]) - - def testAssertGreaterEqual(self): - self.assertMessages('assertGreaterEqual', (1, 2), - ["^1 not greater than or equal to 2$", "^oops$", - "^1 not greater than or equal to 2$", - "^1 not greater than or equal to 2 : oops$"]) - - def testAssertIsNone(self): - self.assertMessages('assertIsNone', ('not None',), - ["^'not None' is not None$", "^oops$", - "^'not None' is not None$", - "^'not None' is not None : oops$"]) - - def testAssertIsNotNone(self): - self.assertMessages('assertIsNotNone', (None,), - ["^unexpectedly None$", "^oops$", - "^unexpectedly None$", - "^unexpectedly None : oops$"]) - - def testAssertIs(self): - self.assertMessages('assertIs', (None, 'foo'), - ["^None is not 'foo'$", "^oops$", - "^None is not 'foo'$", - "^None is not 'foo' : oops$"]) - - def testAssertIsNot(self): - self.assertMessages('assertIsNot', (None, None), - ["^unexpectedly identical: None$", "^oops$", - "^unexpectedly identical: None$", - "^unexpectedly identical: None : oops$"]) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/test/test_break.py b/python/Lib/unittest/test/test_break.py deleted file mode 100644 index dab91c135c..0000000000 --- a/python/Lib/unittest/test/test_break.py +++ /dev/null @@ -1,284 +0,0 @@ -import gc -import os -import sys -import signal -import weakref - -from cStringIO import StringIO - - -import unittest - - -@unittest.skipUnless(hasattr(os, 'kill'), "Test requires os.kill") -@unittest.skipIf(sys.platform =="win32", "Test cannot run on Windows") -@unittest.skipIf(sys.platform == 'freebsd6', "Test kills regrtest on freebsd6 " - "if threads have been used") -class TestBreak(unittest.TestCase): - int_handler = None - - def setUp(self): - self._default_handler = signal.getsignal(signal.SIGINT) - if self.int_handler is not None: - signal.signal(signal.SIGINT, self.int_handler) - - def tearDown(self): - signal.signal(signal.SIGINT, self._default_handler) - unittest.signals._results = weakref.WeakKeyDictionary() - unittest.signals._interrupt_handler = None - - - def testInstallHandler(self): - default_handler = signal.getsignal(signal.SIGINT) - unittest.installHandler() - self.assertNotEqual(signal.getsignal(signal.SIGINT), default_handler) - - try: - pid = os.getpid() - os.kill(pid, signal.SIGINT) - except KeyboardInterrupt: - self.fail("KeyboardInterrupt not handled") - - self.assertTrue(unittest.signals._interrupt_handler.called) - - def testRegisterResult(self): - result = unittest.TestResult() - unittest.registerResult(result) - - for ref in unittest.signals._results: - if ref is result: - break - elif ref is not result: - self.fail("odd object in result set") - else: - self.fail("result not found") - - - def testInterruptCaught(self): - default_handler = signal.getsignal(signal.SIGINT) - - result = unittest.TestResult() - unittest.installHandler() - unittest.registerResult(result) - - self.assertNotEqual(signal.getsignal(signal.SIGINT), default_handler) - - def test(result): - pid = os.getpid() - os.kill(pid, signal.SIGINT) - result.breakCaught = True - self.assertTrue(result.shouldStop) - - try: - test(result) - except KeyboardInterrupt: - self.fail("KeyboardInterrupt not handled") - self.assertTrue(result.breakCaught) - - - def testSecondInterrupt(self): - # Can't use skipIf decorator because the signal handler may have - # been changed after defining this method. - if signal.getsignal(signal.SIGINT) == signal.SIG_IGN: - self.skipTest("test requires SIGINT to not be ignored") - result = unittest.TestResult() - unittest.installHandler() - unittest.registerResult(result) - - def test(result): - pid = os.getpid() - os.kill(pid, signal.SIGINT) - result.breakCaught = True - self.assertTrue(result.shouldStop) - os.kill(pid, signal.SIGINT) - self.fail("Second KeyboardInterrupt not raised") - - try: - test(result) - except KeyboardInterrupt: - pass - else: - self.fail("Second KeyboardInterrupt not raised") - self.assertTrue(result.breakCaught) - - - def testTwoResults(self): - unittest.installHandler() - - result = unittest.TestResult() - unittest.registerResult(result) - new_handler = signal.getsignal(signal.SIGINT) - - result2 = unittest.TestResult() - unittest.registerResult(result2) - self.assertEqual(signal.getsignal(signal.SIGINT), new_handler) - - result3 = unittest.TestResult() - - def test(result): - pid = os.getpid() - os.kill(pid, signal.SIGINT) - - try: - test(result) - except KeyboardInterrupt: - self.fail("KeyboardInterrupt not handled") - - self.assertTrue(result.shouldStop) - self.assertTrue(result2.shouldStop) - self.assertFalse(result3.shouldStop) - - - def testHandlerReplacedButCalled(self): - # Can't use skipIf decorator because the signal handler may have - # been changed after defining this method. - if signal.getsignal(signal.SIGINT) == signal.SIG_IGN: - self.skipTest("test requires SIGINT to not be ignored") - # If our handler has been replaced (is no longer installed) but is - # called by the *new* handler, then it isn't safe to delay the - # SIGINT and we should immediately delegate to the default handler - unittest.installHandler() - - handler = signal.getsignal(signal.SIGINT) - def new_handler(frame, signum): - handler(frame, signum) - signal.signal(signal.SIGINT, new_handler) - - try: - pid = os.getpid() - os.kill(pid, signal.SIGINT) - except KeyboardInterrupt: - pass - else: - self.fail("replaced but delegated handler doesn't raise interrupt") - - def testRunner(self): - # Creating a TextTestRunner with the appropriate argument should - # register the TextTestResult it creates - runner = unittest.TextTestRunner(stream=StringIO()) - - result = runner.run(unittest.TestSuite()) - self.assertIn(result, unittest.signals._results) - - def testWeakReferences(self): - # Calling registerResult on a result should not keep it alive - result = unittest.TestResult() - unittest.registerResult(result) - - ref = weakref.ref(result) - del result - - # For non-reference counting implementations - gc.collect();gc.collect() - self.assertIsNone(ref()) - - - def testRemoveResult(self): - result = unittest.TestResult() - unittest.registerResult(result) - - unittest.installHandler() - self.assertTrue(unittest.removeResult(result)) - - # Should this raise an error instead? - self.assertFalse(unittest.removeResult(unittest.TestResult())) - - try: - pid = os.getpid() - os.kill(pid, signal.SIGINT) - except KeyboardInterrupt: - pass - - self.assertFalse(result.shouldStop) - - def testMainInstallsHandler(self): - failfast = object() - test = object() - verbosity = object() - result = object() - default_handler = signal.getsignal(signal.SIGINT) - - class FakeRunner(object): - initArgs = [] - runArgs = [] - def __init__(self, *args, **kwargs): - self.initArgs.append((args, kwargs)) - def run(self, test): - self.runArgs.append(test) - return result - - class Program(unittest.TestProgram): - def __init__(self, catchbreak): - self.exit = False - self.verbosity = verbosity - self.failfast = failfast - self.catchbreak = catchbreak - self.testRunner = FakeRunner - self.test = test - self.result = None - - p = Program(False) - p.runTests() - - self.assertEqual(FakeRunner.initArgs, [((), {'buffer': None, - 'verbosity': verbosity, - 'failfast': failfast})]) - self.assertEqual(FakeRunner.runArgs, [test]) - self.assertEqual(p.result, result) - - self.assertEqual(signal.getsignal(signal.SIGINT), default_handler) - - FakeRunner.initArgs = [] - FakeRunner.runArgs = [] - p = Program(True) - p.runTests() - - self.assertEqual(FakeRunner.initArgs, [((), {'buffer': None, - 'verbosity': verbosity, - 'failfast': failfast})]) - self.assertEqual(FakeRunner.runArgs, [test]) - self.assertEqual(p.result, result) - - self.assertNotEqual(signal.getsignal(signal.SIGINT), default_handler) - - def testRemoveHandler(self): - default_handler = signal.getsignal(signal.SIGINT) - unittest.installHandler() - unittest.removeHandler() - self.assertEqual(signal.getsignal(signal.SIGINT), default_handler) - - # check that calling removeHandler multiple times has no ill-effect - unittest.removeHandler() - self.assertEqual(signal.getsignal(signal.SIGINT), default_handler) - - def testRemoveHandlerAsDecorator(self): - default_handler = signal.getsignal(signal.SIGINT) - unittest.installHandler() - - @unittest.removeHandler - def test(): - self.assertEqual(signal.getsignal(signal.SIGINT), default_handler) - - test() - self.assertNotEqual(signal.getsignal(signal.SIGINT), default_handler) - -@unittest.skipUnless(hasattr(os, 'kill'), "Test requires os.kill") -@unittest.skipIf(sys.platform =="win32", "Test cannot run on Windows") -@unittest.skipIf(sys.platform == 'freebsd6', "Test kills regrtest on freebsd6 " - "if threads have been used") -class TestBreakDefaultIntHandler(TestBreak): - int_handler = signal.default_int_handler - -@unittest.skipUnless(hasattr(os, 'kill'), "Test requires os.kill") -@unittest.skipIf(sys.platform =="win32", "Test cannot run on Windows") -@unittest.skipIf(sys.platform == 'freebsd6', "Test kills regrtest on freebsd6 " - "if threads have been used") -class TestBreakSignalIgnored(TestBreak): - int_handler = signal.SIG_IGN - -@unittest.skipUnless(hasattr(os, 'kill'), "Test requires os.kill") -@unittest.skipIf(sys.platform =="win32", "Test cannot run on Windows") -@unittest.skipIf(sys.platform == 'freebsd6', "Test kills regrtest on freebsd6 " - "if threads have been used") -class TestBreakSignalDefault(TestBreak): - int_handler = signal.SIG_DFL diff --git a/python/Lib/unittest/test/test_case.py b/python/Lib/unittest/test/test_case.py deleted file mode 100644 index 7658189fe5..0000000000 --- a/python/Lib/unittest/test/test_case.py +++ /dev/null @@ -1,1171 +0,0 @@ -import difflib -import pprint -import pickle -import re -import sys - -from copy import deepcopy -from test import test_support - -import unittest - -from unittest.test.support import ( - TestEquality, TestHashing, LoggingResult, ResultWithNoStartTestRunStopTestRun -) - - -class Test(object): - "Keep these TestCase classes out of the main namespace" - - class Foo(unittest.TestCase): - def runTest(self): pass - def test1(self): pass - - class Bar(Foo): - def test2(self): pass - - class LoggingTestCase(unittest.TestCase): - """A test case which logs its calls.""" - - def __init__(self, events): - super(Test.LoggingTestCase, self).__init__('test') - self.events = events - - def setUp(self): - self.events.append('setUp') - - def test(self): - self.events.append('test') - - def tearDown(self): - self.events.append('tearDown') - - -class Test_TestCase(unittest.TestCase, TestEquality, TestHashing): - - ### Set up attributes used by inherited tests - ################################################################ - - # Used by TestHashing.test_hash and TestEquality.test_eq - eq_pairs = [(Test.Foo('test1'), Test.Foo('test1'))] - - # Used by TestEquality.test_ne - ne_pairs = [(Test.Foo('test1'), Test.Foo('runTest')) - ,(Test.Foo('test1'), Test.Bar('test1')) - ,(Test.Foo('test1'), Test.Bar('test2'))] - - ################################################################ - ### /Set up attributes used by inherited tests - - - # "class TestCase([methodName])" - # ... - # "Each instance of TestCase will run a single test method: the - # method named methodName." - # ... - # "methodName defaults to "runTest"." - # - # Make sure it really is optional, and that it defaults to the proper - # thing. - def test_init__no_test_name(self): - class Test(unittest.TestCase): - def runTest(self): raise TypeError() - def test(self): pass - - self.assertEqual(Test().id()[-13:], '.Test.runTest') - - # "class TestCase([methodName])" - # ... - # "Each instance of TestCase will run a single test method: the - # method named methodName." - def test_init__test_name__valid(self): - class Test(unittest.TestCase): - def runTest(self): raise TypeError() - def test(self): pass - - self.assertEqual(Test('test').id()[-10:], '.Test.test') - - # "class TestCase([methodName])" - # ... - # "Each instance of TestCase will run a single test method: the - # method named methodName." - def test_init__test_name__invalid(self): - class Test(unittest.TestCase): - def runTest(self): raise TypeError() - def test(self): pass - - try: - Test('testfoo') - except ValueError: - pass - else: - self.fail("Failed to raise ValueError") - - # "Return the number of tests represented by the this test object. For - # TestCase instances, this will always be 1" - def test_countTestCases(self): - class Foo(unittest.TestCase): - def test(self): pass - - self.assertEqual(Foo('test').countTestCases(), 1) - - # "Return the default type of test result object to be used to run this - # test. For TestCase instances, this will always be - # unittest.TestResult; subclasses of TestCase should - # override this as necessary." - def test_defaultTestResult(self): - class Foo(unittest.TestCase): - def runTest(self): - pass - - result = Foo().defaultTestResult() - self.assertEqual(type(result), unittest.TestResult) - - # "When a setUp() method is defined, the test runner will run that method - # prior to each test. Likewise, if a tearDown() method is defined, the - # test runner will invoke that method after each test. In the example, - # setUp() was used to create a fresh sequence for each test." - # - # Make sure the proper call order is maintained, even if setUp() raises - # an exception. - def test_run_call_order__error_in_setUp(self): - events = [] - result = LoggingResult(events) - - class Foo(Test.LoggingTestCase): - def setUp(self): - super(Foo, self).setUp() - raise RuntimeError('raised by Foo.setUp') - - Foo(events).run(result) - expected = ['startTest', 'setUp', 'addError', 'stopTest'] - self.assertEqual(events, expected) - - # "With a temporary result stopTestRun is called when setUp errors. - def test_run_call_order__error_in_setUp_default_result(self): - events = [] - - class Foo(Test.LoggingTestCase): - def defaultTestResult(self): - return LoggingResult(self.events) - - def setUp(self): - super(Foo, self).setUp() - raise RuntimeError('raised by Foo.setUp') - - Foo(events).run() - expected = ['startTestRun', 'startTest', 'setUp', 'addError', - 'stopTest', 'stopTestRun'] - self.assertEqual(events, expected) - - # "When a setUp() method is defined, the test runner will run that method - # prior to each test. Likewise, if a tearDown() method is defined, the - # test runner will invoke that method after each test. In the example, - # setUp() was used to create a fresh sequence for each test." - # - # Make sure the proper call order is maintained, even if the test raises - # an error (as opposed to a failure). - def test_run_call_order__error_in_test(self): - events = [] - result = LoggingResult(events) - - class Foo(Test.LoggingTestCase): - def test(self): - super(Foo, self).test() - raise RuntimeError('raised by Foo.test') - - expected = ['startTest', 'setUp', 'test', 'addError', 'tearDown', - 'stopTest'] - Foo(events).run(result) - self.assertEqual(events, expected) - - # "With a default result, an error in the test still results in stopTestRun - # being called." - def test_run_call_order__error_in_test_default_result(self): - events = [] - - class Foo(Test.LoggingTestCase): - def defaultTestResult(self): - return LoggingResult(self.events) - - def test(self): - super(Foo, self).test() - raise RuntimeError('raised by Foo.test') - - expected = ['startTestRun', 'startTest', 'setUp', 'test', 'addError', - 'tearDown', 'stopTest', 'stopTestRun'] - Foo(events).run() - self.assertEqual(events, expected) - - # "When a setUp() method is defined, the test runner will run that method - # prior to each test. Likewise, if a tearDown() method is defined, the - # test runner will invoke that method after each test. In the example, - # setUp() was used to create a fresh sequence for each test." - # - # Make sure the proper call order is maintained, even if the test signals - # a failure (as opposed to an error). - def test_run_call_order__failure_in_test(self): - events = [] - result = LoggingResult(events) - - class Foo(Test.LoggingTestCase): - def test(self): - super(Foo, self).test() - self.fail('raised by Foo.test') - - expected = ['startTest', 'setUp', 'test', 'addFailure', 'tearDown', - 'stopTest'] - Foo(events).run(result) - self.assertEqual(events, expected) - - # "When a test fails with a default result stopTestRun is still called." - def test_run_call_order__failure_in_test_default_result(self): - - class Foo(Test.LoggingTestCase): - def defaultTestResult(self): - return LoggingResult(self.events) - def test(self): - super(Foo, self).test() - self.fail('raised by Foo.test') - - expected = ['startTestRun', 'startTest', 'setUp', 'test', 'addFailure', - 'tearDown', 'stopTest', 'stopTestRun'] - events = [] - Foo(events).run() - self.assertEqual(events, expected) - - # "When a setUp() method is defined, the test runner will run that method - # prior to each test. Likewise, if a tearDown() method is defined, the - # test runner will invoke that method after each test. In the example, - # setUp() was used to create a fresh sequence for each test." - # - # Make sure the proper call order is maintained, even if tearDown() raises - # an exception. - def test_run_call_order__error_in_tearDown(self): - events = [] - result = LoggingResult(events) - - class Foo(Test.LoggingTestCase): - def tearDown(self): - super(Foo, self).tearDown() - raise RuntimeError('raised by Foo.tearDown') - - Foo(events).run(result) - expected = ['startTest', 'setUp', 'test', 'tearDown', 'addError', - 'stopTest'] - self.assertEqual(events, expected) - - # "When tearDown errors with a default result stopTestRun is still called." - def test_run_call_order__error_in_tearDown_default_result(self): - - class Foo(Test.LoggingTestCase): - def defaultTestResult(self): - return LoggingResult(self.events) - def tearDown(self): - super(Foo, self).tearDown() - raise RuntimeError('raised by Foo.tearDown') - - events = [] - Foo(events).run() - expected = ['startTestRun', 'startTest', 'setUp', 'test', 'tearDown', - 'addError', 'stopTest', 'stopTestRun'] - self.assertEqual(events, expected) - - # "TestCase.run() still works when the defaultTestResult is a TestResult - # that does not support startTestRun and stopTestRun. - def test_run_call_order_default_result(self): - - class Foo(unittest.TestCase): - def defaultTestResult(self): - return ResultWithNoStartTestRunStopTestRun() - def test(self): - pass - - Foo('test').run() - - # "This class attribute gives the exception raised by the test() method. - # If a test framework needs to use a specialized exception, possibly to - # carry additional information, it must subclass this exception in - # order to ``play fair'' with the framework. The initial value of this - # attribute is AssertionError" - def test_failureException__default(self): - class Foo(unittest.TestCase): - def test(self): - pass - - self.assertIs(Foo('test').failureException, AssertionError) - - # "This class attribute gives the exception raised by the test() method. - # If a test framework needs to use a specialized exception, possibly to - # carry additional information, it must subclass this exception in - # order to ``play fair'' with the framework." - # - # Make sure TestCase.run() respects the designated failureException - def test_failureException__subclassing__explicit_raise(self): - events = [] - result = LoggingResult(events) - - class Foo(unittest.TestCase): - def test(self): - raise RuntimeError() - - failureException = RuntimeError - - self.assertIs(Foo('test').failureException, RuntimeError) - - - Foo('test').run(result) - expected = ['startTest', 'addFailure', 'stopTest'] - self.assertEqual(events, expected) - - # "This class attribute gives the exception raised by the test() method. - # If a test framework needs to use a specialized exception, possibly to - # carry additional information, it must subclass this exception in - # order to ``play fair'' with the framework." - # - # Make sure TestCase.run() respects the designated failureException - def test_failureException__subclassing__implicit_raise(self): - events = [] - result = LoggingResult(events) - - class Foo(unittest.TestCase): - def test(self): - self.fail("foo") - - failureException = RuntimeError - - self.assertIs(Foo('test').failureException, RuntimeError) - - - Foo('test').run(result) - expected = ['startTest', 'addFailure', 'stopTest'] - self.assertEqual(events, expected) - - # "The default implementation does nothing." - def test_setUp(self): - class Foo(unittest.TestCase): - def runTest(self): - pass - - # ... and nothing should happen - Foo().setUp() - - # "The default implementation does nothing." - def test_tearDown(self): - class Foo(unittest.TestCase): - def runTest(self): - pass - - # ... and nothing should happen - Foo().tearDown() - - # "Return a string identifying the specific test case." - # - # Because of the vague nature of the docs, I'm not going to lock this - # test down too much. Really all that can be asserted is that the id() - # will be a string (either 8-byte or unicode -- again, because the docs - # just say "string") - def test_id(self): - class Foo(unittest.TestCase): - def runTest(self): - pass - - self.assertIsInstance(Foo().id(), basestring) - - # "If result is omitted or None, a temporary result object is created - # and used, but is not made available to the caller. As TestCase owns the - # temporary result startTestRun and stopTestRun are called. - - def test_run__uses_defaultTestResult(self): - events = [] - - class Foo(unittest.TestCase): - def test(self): - events.append('test') - - def defaultTestResult(self): - return LoggingResult(events) - - # Make run() find a result object on its own - Foo('test').run() - - expected = ['startTestRun', 'startTest', 'test', 'addSuccess', - 'stopTest', 'stopTestRun'] - self.assertEqual(events, expected) - - def testShortDescriptionWithoutDocstring(self): - self.assertIsNone(self.shortDescription()) - - @unittest.skipIf(sys.flags.optimize >= 2, - "Docstrings are omitted with -O2 and above") - def testShortDescriptionWithOneLineDocstring(self): - """Tests shortDescription() for a method with a docstring.""" - self.assertEqual( - self.shortDescription(), - 'Tests shortDescription() for a method with a docstring.') - - @unittest.skipIf(sys.flags.optimize >= 2, - "Docstrings are omitted with -O2 and above") - def testShortDescriptionWithMultiLineDocstring(self): - """Tests shortDescription() for a method with a longer docstring. - - This method ensures that only the first line of a docstring is - returned used in the short description, no matter how long the - whole thing is. - """ - self.assertEqual( - self.shortDescription(), - 'Tests shortDescription() for a method with a longer ' - 'docstring.') - - def testAddTypeEqualityFunc(self): - class SadSnake(object): - """Dummy class for test_addTypeEqualityFunc.""" - s1, s2 = SadSnake(), SadSnake() - self.assertNotEqual(s1, s2) - def AllSnakesCreatedEqual(a, b, msg=None): - return type(a) is type(b) is SadSnake - self.addTypeEqualityFunc(SadSnake, AllSnakesCreatedEqual) - self.assertEqual(s1, s2) - # No this doesn't clean up and remove the SadSnake equality func - # from this TestCase instance but since its a local nothing else - # will ever notice that. - - def testAssertIs(self): - thing = object() - self.assertIs(thing, thing) - self.assertRaises(self.failureException, self.assertIs, thing, object()) - - def testAssertIsNot(self): - thing = object() - self.assertIsNot(thing, object()) - self.assertRaises(self.failureException, self.assertIsNot, thing, thing) - - def testAssertIsInstance(self): - thing = [] - self.assertIsInstance(thing, list) - self.assertRaises(self.failureException, self.assertIsInstance, - thing, dict) - - def testAssertNotIsInstance(self): - thing = [] - self.assertNotIsInstance(thing, dict) - self.assertRaises(self.failureException, self.assertNotIsInstance, - thing, list) - - def testAssertIn(self): - animals = {'monkey': 'banana', 'cow': 'grass', 'seal': 'fish'} - - self.assertIn('a', 'abc') - self.assertIn(2, [1, 2, 3]) - self.assertIn('monkey', animals) - - self.assertNotIn('d', 'abc') - self.assertNotIn(0, [1, 2, 3]) - self.assertNotIn('otter', animals) - - self.assertRaises(self.failureException, self.assertIn, 'x', 'abc') - self.assertRaises(self.failureException, self.assertIn, 4, [1, 2, 3]) - self.assertRaises(self.failureException, self.assertIn, 'elephant', - animals) - - self.assertRaises(self.failureException, self.assertNotIn, 'c', 'abc') - self.assertRaises(self.failureException, self.assertNotIn, 1, [1, 2, 3]) - self.assertRaises(self.failureException, self.assertNotIn, 'cow', - animals) - - def testAssertDictContainsSubset(self): - self.assertDictContainsSubset({}, {}) - self.assertDictContainsSubset({}, {'a': 1}) - self.assertDictContainsSubset({'a': 1}, {'a': 1}) - self.assertDictContainsSubset({'a': 1}, {'a': 1, 'b': 2}) - self.assertDictContainsSubset({'a': 1, 'b': 2}, {'a': 1, 'b': 2}) - - with self.assertRaises(self.failureException): - self.assertDictContainsSubset({1: "one"}, {}) - - with self.assertRaises(self.failureException): - self.assertDictContainsSubset({'a': 2}, {'a': 1}) - - with self.assertRaises(self.failureException): - self.assertDictContainsSubset({'c': 1}, {'a': 1}) - - with self.assertRaises(self.failureException): - self.assertDictContainsSubset({'a': 1, 'c': 1}, {'a': 1}) - - with self.assertRaises(self.failureException): - self.assertDictContainsSubset({'a': 1, 'c': 1}, {'a': 1}) - - with test_support.check_warnings(("", UnicodeWarning)): - one = ''.join(chr(i) for i in range(255)) - # this used to cause a UnicodeDecodeError constructing the failure msg - with self.assertRaises(self.failureException): - self.assertDictContainsSubset({'foo': one}, {'foo': u'\uFFFD'}) - - def testAssertEqual(self): - equal_pairs = [ - ((), ()), - ({}, {}), - ([], []), - (set(), set()), - (frozenset(), frozenset())] - for a, b in equal_pairs: - # This mess of try excepts is to test the assertEqual behavior - # itself. - try: - self.assertEqual(a, b) - except self.failureException: - self.fail('assertEqual(%r, %r) failed' % (a, b)) - try: - self.assertEqual(a, b, msg='foo') - except self.failureException: - self.fail('assertEqual(%r, %r) with msg= failed' % (a, b)) - try: - self.assertEqual(a, b, 'foo') - except self.failureException: - self.fail('assertEqual(%r, %r) with third parameter failed' % - (a, b)) - - unequal_pairs = [ - ((), []), - ({}, set()), - (set([4,1]), frozenset([4,2])), - (frozenset([4,5]), set([2,3])), - (set([3,4]), set([5,4]))] - for a, b in unequal_pairs: - self.assertRaises(self.failureException, self.assertEqual, a, b) - self.assertRaises(self.failureException, self.assertEqual, a, b, - 'foo') - self.assertRaises(self.failureException, self.assertEqual, a, b, - msg='foo') - - def testEquality(self): - self.assertListEqual([], []) - self.assertTupleEqual((), ()) - self.assertSequenceEqual([], ()) - - a = [0, 'a', []] - b = [] - self.assertRaises(unittest.TestCase.failureException, - self.assertListEqual, a, b) - self.assertRaises(unittest.TestCase.failureException, - self.assertListEqual, tuple(a), tuple(b)) - self.assertRaises(unittest.TestCase.failureException, - self.assertSequenceEqual, a, tuple(b)) - - b.extend(a) - self.assertListEqual(a, b) - self.assertTupleEqual(tuple(a), tuple(b)) - self.assertSequenceEqual(a, tuple(b)) - self.assertSequenceEqual(tuple(a), b) - - self.assertRaises(self.failureException, self.assertListEqual, - a, tuple(b)) - self.assertRaises(self.failureException, self.assertTupleEqual, - tuple(a), b) - self.assertRaises(self.failureException, self.assertListEqual, None, b) - self.assertRaises(self.failureException, self.assertTupleEqual, None, - tuple(b)) - self.assertRaises(self.failureException, self.assertSequenceEqual, - None, tuple(b)) - self.assertRaises(self.failureException, self.assertListEqual, 1, 1) - self.assertRaises(self.failureException, self.assertTupleEqual, 1, 1) - self.assertRaises(self.failureException, self.assertSequenceEqual, - 1, 1) - - self.assertDictEqual({}, {}) - - c = { 'x': 1 } - d = {} - self.assertRaises(unittest.TestCase.failureException, - self.assertDictEqual, c, d) - - d.update(c) - self.assertDictEqual(c, d) - - d['x'] = 0 - self.assertRaises(unittest.TestCase.failureException, - self.assertDictEqual, c, d, 'These are unequal') - - self.assertRaises(self.failureException, self.assertDictEqual, None, d) - self.assertRaises(self.failureException, self.assertDictEqual, [], d) - self.assertRaises(self.failureException, self.assertDictEqual, 1, 1) - - def testAssertSequenceEqualMaxDiff(self): - self.assertEqual(self.maxDiff, 80*8) - seq1 = 'a' + 'x' * 80**2 - seq2 = 'b' + 'x' * 80**2 - diff = '\n'.join(difflib.ndiff(pprint.pformat(seq1).splitlines(), - pprint.pformat(seq2).splitlines())) - # the +1 is the leading \n added by assertSequenceEqual - omitted = unittest.case.DIFF_OMITTED % (len(diff) + 1,) - - self.maxDiff = len(diff)//2 - try: - self.assertSequenceEqual(seq1, seq2) - except self.failureException as e: - msg = e.args[0] - else: - self.fail('assertSequenceEqual did not fail.') - self.assertLess(len(msg), len(diff)) - self.assertIn(omitted, msg) - - self.maxDiff = len(diff) * 2 - try: - self.assertSequenceEqual(seq1, seq2) - except self.failureException as e: - msg = e.args[0] - else: - self.fail('assertSequenceEqual did not fail.') - self.assertGreater(len(msg), len(diff)) - self.assertNotIn(omitted, msg) - - self.maxDiff = None - try: - self.assertSequenceEqual(seq1, seq2) - except self.failureException as e: - msg = e.args[0] - else: - self.fail('assertSequenceEqual did not fail.') - self.assertGreater(len(msg), len(diff)) - self.assertNotIn(omitted, msg) - - def testTruncateMessage(self): - self.maxDiff = 1 - message = self._truncateMessage('foo', 'bar') - omitted = unittest.case.DIFF_OMITTED % len('bar') - self.assertEqual(message, 'foo' + omitted) - - self.maxDiff = None - message = self._truncateMessage('foo', 'bar') - self.assertEqual(message, 'foobar') - - self.maxDiff = 4 - message = self._truncateMessage('foo', 'bar') - self.assertEqual(message, 'foobar') - - def testAssertDictEqualTruncates(self): - test = unittest.TestCase('assertEqual') - def truncate(msg, diff): - return 'foo' - test._truncateMessage = truncate - try: - test.assertDictEqual({}, {1: 0}) - except self.failureException as e: - self.assertEqual(str(e), 'foo') - else: - self.fail('assertDictEqual did not fail') - - def testAssertMultiLineEqualTruncates(self): - test = unittest.TestCase('assertEqual') - def truncate(msg, diff): - return 'foo' - test._truncateMessage = truncate - try: - test.assertMultiLineEqual('foo', 'bar') - except self.failureException as e: - self.assertEqual(str(e), 'foo') - else: - self.fail('assertMultiLineEqual did not fail') - - def testAssertEqual_diffThreshold(self): - # check threshold value - self.assertEqual(self._diffThreshold, 2**16) - # disable madDiff to get diff markers - self.maxDiff = None - - # set a lower threshold value and add a cleanup to restore it - old_threshold = self._diffThreshold - self._diffThreshold = 2**8 - self.addCleanup(lambda: setattr(self, '_diffThreshold', old_threshold)) - - # under the threshold: diff marker (^) in error message - s = u'x' * (2**7) - with self.assertRaises(self.failureException) as cm: - self.assertEqual(s + 'a', s + 'b') - self.assertIn('^', str(cm.exception)) - self.assertEqual(s + 'a', s + 'a') - - # over the threshold: diff not used and marker (^) not in error message - s = u'x' * (2**9) - # if the path that uses difflib is taken, _truncateMessage will be - # called -- replace it with explodingTruncation to verify that this - # doesn't happen - def explodingTruncation(message, diff): - raise SystemError('this should not be raised') - old_truncate = self._truncateMessage - self._truncateMessage = explodingTruncation - self.addCleanup(lambda: setattr(self, '_truncateMessage', old_truncate)) - - s1, s2 = s + 'a', s + 'b' - with self.assertRaises(self.failureException) as cm: - self.assertEqual(s1, s2) - self.assertNotIn('^', str(cm.exception)) - self.assertEqual(str(cm.exception), '%r != %r' % (s1, s2)) - self.assertEqual(s + 'a', s + 'a') - - def testAssertItemsEqual(self): - a = object() - self.assertItemsEqual([1, 2, 3], [3, 2, 1]) - self.assertItemsEqual(['foo', 'bar', 'baz'], ['bar', 'baz', 'foo']) - self.assertItemsEqual([a, a, 2, 2, 3], (a, 2, 3, a, 2)) - self.assertItemsEqual([1, "2", "a", "a"], ["a", "2", True, "a"]) - self.assertRaises(self.failureException, self.assertItemsEqual, - [1, 2] + [3] * 100, [1] * 100 + [2, 3]) - self.assertRaises(self.failureException, self.assertItemsEqual, - [1, "2", "a", "a"], ["a", "2", True, 1]) - self.assertRaises(self.failureException, self.assertItemsEqual, - [10], [10, 11]) - self.assertRaises(self.failureException, self.assertItemsEqual, - [10, 11], [10]) - self.assertRaises(self.failureException, self.assertItemsEqual, - [10, 11, 10], [10, 11]) - - # Test that sequences of unhashable objects can be tested for sameness: - self.assertItemsEqual([[1, 2], [3, 4], 0], [False, [3, 4], [1, 2]]) - # Test that iterator of unhashable objects can be tested for sameness: - self.assertItemsEqual(iter([1, 2, [], 3, 4]), - iter([1, 2, [], 3, 4])) - - # hashable types, but not orderable - self.assertRaises(self.failureException, self.assertItemsEqual, - [], [divmod, 'x', 1, 5j, 2j, frozenset()]) - # comparing dicts - self.assertItemsEqual([{'a': 1}, {'b': 2}], [{'b': 2}, {'a': 1}]) - # comparing heterogenous non-hashable sequences - self.assertItemsEqual([1, 'x', divmod, []], [divmod, [], 'x', 1]) - self.assertRaises(self.failureException, self.assertItemsEqual, - [], [divmod, [], 'x', 1, 5j, 2j, set()]) - self.assertRaises(self.failureException, self.assertItemsEqual, - [[1]], [[2]]) - - # Same elements, but not same sequence length - self.assertRaises(self.failureException, self.assertItemsEqual, - [1, 1, 2], [2, 1]) - self.assertRaises(self.failureException, self.assertItemsEqual, - [1, 1, "2", "a", "a"], ["2", "2", True, "a"]) - self.assertRaises(self.failureException, self.assertItemsEqual, - [1, {'b': 2}, None, True], [{'b': 2}, True, None]) - - # Same elements which don't reliably compare, in - # different order, see issue 10242 - a = [{2,4}, {1,2}] - b = a[::-1] - self.assertItemsEqual(a, b) - - # test utility functions supporting assertItemsEqual() - - diffs = set(unittest.util._count_diff_all_purpose('aaabccd', 'abbbcce')) - expected = {(3,1,'a'), (1,3,'b'), (1,0,'d'), (0,1,'e')} - self.assertEqual(diffs, expected) - - diffs = unittest.util._count_diff_all_purpose([[]], []) - self.assertEqual(diffs, [(1, 0, [])]) - - diffs = set(unittest.util._count_diff_hashable('aaabccd', 'abbbcce')) - expected = {(3,1,'a'), (1,3,'b'), (1,0,'d'), (0,1,'e')} - self.assertEqual(diffs, expected) - - def testAssertSetEqual(self): - set1 = set() - set2 = set() - self.assertSetEqual(set1, set2) - - self.assertRaises(self.failureException, self.assertSetEqual, None, set2) - self.assertRaises(self.failureException, self.assertSetEqual, [], set2) - self.assertRaises(self.failureException, self.assertSetEqual, set1, None) - self.assertRaises(self.failureException, self.assertSetEqual, set1, []) - - set1 = set(['a']) - set2 = set() - self.assertRaises(self.failureException, self.assertSetEqual, set1, set2) - - set1 = set(['a']) - set2 = set(['a']) - self.assertSetEqual(set1, set2) - - set1 = set(['a']) - set2 = set(['a', 'b']) - self.assertRaises(self.failureException, self.assertSetEqual, set1, set2) - - set1 = set(['a']) - set2 = frozenset(['a', 'b']) - self.assertRaises(self.failureException, self.assertSetEqual, set1, set2) - - set1 = set(['a', 'b']) - set2 = frozenset(['a', 'b']) - self.assertSetEqual(set1, set2) - - set1 = set() - set2 = "foo" - self.assertRaises(self.failureException, self.assertSetEqual, set1, set2) - self.assertRaises(self.failureException, self.assertSetEqual, set2, set1) - - # make sure any string formatting is tuple-safe - set1 = set([(0, 1), (2, 3)]) - set2 = set([(4, 5)]) - self.assertRaises(self.failureException, self.assertSetEqual, set1, set2) - - def testInequality(self): - # Try ints - self.assertGreater(2, 1) - self.assertGreaterEqual(2, 1) - self.assertGreaterEqual(1, 1) - self.assertLess(1, 2) - self.assertLessEqual(1, 2) - self.assertLessEqual(1, 1) - self.assertRaises(self.failureException, self.assertGreater, 1, 2) - self.assertRaises(self.failureException, self.assertGreater, 1, 1) - self.assertRaises(self.failureException, self.assertGreaterEqual, 1, 2) - self.assertRaises(self.failureException, self.assertLess, 2, 1) - self.assertRaises(self.failureException, self.assertLess, 1, 1) - self.assertRaises(self.failureException, self.assertLessEqual, 2, 1) - - # Try Floats - self.assertGreater(1.1, 1.0) - self.assertGreaterEqual(1.1, 1.0) - self.assertGreaterEqual(1.0, 1.0) - self.assertLess(1.0, 1.1) - self.assertLessEqual(1.0, 1.1) - self.assertLessEqual(1.0, 1.0) - self.assertRaises(self.failureException, self.assertGreater, 1.0, 1.1) - self.assertRaises(self.failureException, self.assertGreater, 1.0, 1.0) - self.assertRaises(self.failureException, self.assertGreaterEqual, 1.0, 1.1) - self.assertRaises(self.failureException, self.assertLess, 1.1, 1.0) - self.assertRaises(self.failureException, self.assertLess, 1.0, 1.0) - self.assertRaises(self.failureException, self.assertLessEqual, 1.1, 1.0) - - # Try Strings - self.assertGreater('bug', 'ant') - self.assertGreaterEqual('bug', 'ant') - self.assertGreaterEqual('ant', 'ant') - self.assertLess('ant', 'bug') - self.assertLessEqual('ant', 'bug') - self.assertLessEqual('ant', 'ant') - self.assertRaises(self.failureException, self.assertGreater, 'ant', 'bug') - self.assertRaises(self.failureException, self.assertGreater, 'ant', 'ant') - self.assertRaises(self.failureException, self.assertGreaterEqual, 'ant', 'bug') - self.assertRaises(self.failureException, self.assertLess, 'bug', 'ant') - self.assertRaises(self.failureException, self.assertLess, 'ant', 'ant') - self.assertRaises(self.failureException, self.assertLessEqual, 'bug', 'ant') - - # Try Unicode - self.assertGreater(u'bug', u'ant') - self.assertGreaterEqual(u'bug', u'ant') - self.assertGreaterEqual(u'ant', u'ant') - self.assertLess(u'ant', u'bug') - self.assertLessEqual(u'ant', u'bug') - self.assertLessEqual(u'ant', u'ant') - self.assertRaises(self.failureException, self.assertGreater, u'ant', u'bug') - self.assertRaises(self.failureException, self.assertGreater, u'ant', u'ant') - self.assertRaises(self.failureException, self.assertGreaterEqual, u'ant', - u'bug') - self.assertRaises(self.failureException, self.assertLess, u'bug', u'ant') - self.assertRaises(self.failureException, self.assertLess, u'ant', u'ant') - self.assertRaises(self.failureException, self.assertLessEqual, u'bug', u'ant') - - # Try Mixed String/Unicode - self.assertGreater('bug', u'ant') - self.assertGreater(u'bug', 'ant') - self.assertGreaterEqual('bug', u'ant') - self.assertGreaterEqual(u'bug', 'ant') - self.assertGreaterEqual('ant', u'ant') - self.assertGreaterEqual(u'ant', 'ant') - self.assertLess('ant', u'bug') - self.assertLess(u'ant', 'bug') - self.assertLessEqual('ant', u'bug') - self.assertLessEqual(u'ant', 'bug') - self.assertLessEqual('ant', u'ant') - self.assertLessEqual(u'ant', 'ant') - self.assertRaises(self.failureException, self.assertGreater, 'ant', u'bug') - self.assertRaises(self.failureException, self.assertGreater, u'ant', 'bug') - self.assertRaises(self.failureException, self.assertGreater, 'ant', u'ant') - self.assertRaises(self.failureException, self.assertGreater, u'ant', 'ant') - self.assertRaises(self.failureException, self.assertGreaterEqual, 'ant', - u'bug') - self.assertRaises(self.failureException, self.assertGreaterEqual, u'ant', - 'bug') - self.assertRaises(self.failureException, self.assertLess, 'bug', u'ant') - self.assertRaises(self.failureException, self.assertLess, u'bug', 'ant') - self.assertRaises(self.failureException, self.assertLess, 'ant', u'ant') - self.assertRaises(self.failureException, self.assertLess, u'ant', 'ant') - self.assertRaises(self.failureException, self.assertLessEqual, 'bug', u'ant') - self.assertRaises(self.failureException, self.assertLessEqual, u'bug', 'ant') - - def testAssertMultiLineEqual(self): - sample_text = b"""\ -http://www.python.org/doc/2.3/lib/module-unittest.html -test case - A test case is the smallest unit of testing. [...] -""" - revised_sample_text = b"""\ -http://www.python.org/doc/2.4.1/lib/module-unittest.html -test case - A test case is the smallest unit of testing. [...] You may provide your - own implementation that does not subclass from TestCase, of course. -""" - sample_text_error = b"""\ -- http://www.python.org/doc/2.3/lib/module-unittest.html -? ^ -+ http://www.python.org/doc/2.4.1/lib/module-unittest.html -? ^^^ - test case -- A test case is the smallest unit of testing. [...] -+ A test case is the smallest unit of testing. [...] You may provide your -? +++++++++++++++++++++ -+ own implementation that does not subclass from TestCase, of course. -""" - self.maxDiff = None - for type_changer in (lambda x: x, lambda x: x.decode('utf8')): - try: - self.assertMultiLineEqual(type_changer(sample_text), - type_changer(revised_sample_text)) - except self.failureException, e: - # need to remove the first line of the error message - error = str(e).encode('utf8').split('\n', 1)[1] - - # assertMultiLineEqual is hooked up as the default for - # unicode strings - so we can't use it for this check - self.assertTrue(sample_text_error == error) - - def testAsertEqualSingleLine(self): - sample_text = u"laden swallows fly slowly" - revised_sample_text = u"unladen swallows fly quickly" - sample_text_error = """\ -- laden swallows fly slowly -? ^^^^ -+ unladen swallows fly quickly -? ++ ^^^^^ -""" - try: - self.assertEqual(sample_text, revised_sample_text) - except self.failureException as e: - error = str(e).split('\n', 1)[1] - self.assertTrue(sample_text_error == error) - - def testAssertIsNone(self): - self.assertIsNone(None) - self.assertRaises(self.failureException, self.assertIsNone, False) - self.assertIsNotNone('DjZoPloGears on Rails') - self.assertRaises(self.failureException, self.assertIsNotNone, None) - - def testAssertRegexpMatches(self): - self.assertRegexpMatches('asdfabasdf', r'ab+') - self.assertRaises(self.failureException, self.assertRegexpMatches, - 'saaas', r'aaaa') - - def testAssertRaisesCallable(self): - class ExceptionMock(Exception): - pass - def Stub(): - raise ExceptionMock('We expect') - self.assertRaises(ExceptionMock, Stub) - # A tuple of exception classes is accepted - self.assertRaises((ValueError, ExceptionMock), Stub) - # *args and **kwargs also work - self.assertRaises(ValueError, int, '19', base=8) - # Failure when no exception is raised - with self.assertRaises(self.failureException): - self.assertRaises(ExceptionMock, lambda: 0) - # Failure when another exception is raised - with self.assertRaises(ExceptionMock): - self.assertRaises(ValueError, Stub) - - def testAssertRaisesContext(self): - class ExceptionMock(Exception): - pass - def Stub(): - raise ExceptionMock('We expect') - with self.assertRaises(ExceptionMock): - Stub() - # A tuple of exception classes is accepted - with self.assertRaises((ValueError, ExceptionMock)) as cm: - Stub() - # The context manager exposes caught exception - self.assertIsInstance(cm.exception, ExceptionMock) - self.assertEqual(cm.exception.args[0], 'We expect') - # *args and **kwargs also work - with self.assertRaises(ValueError): - int('19', base=8) - # Failure when no exception is raised - with self.assertRaises(self.failureException): - with self.assertRaises(ExceptionMock): - pass - # Failure when another exception is raised - with self.assertRaises(ExceptionMock): - self.assertRaises(ValueError, Stub) - - def testAssertRaisesRegexp(self): - class ExceptionMock(Exception): - pass - - def Stub(): - raise ExceptionMock('We expect') - - self.assertRaisesRegexp(ExceptionMock, re.compile('expect$'), Stub) - self.assertRaisesRegexp(ExceptionMock, 'expect$', Stub) - self.assertRaisesRegexp(ExceptionMock, u'expect$', Stub) - - def testAssertNotRaisesRegexp(self): - self.assertRaisesRegexp( - self.failureException, '^Exception not raised$', - self.assertRaisesRegexp, Exception, re.compile('x'), - lambda: None) - self.assertRaisesRegexp( - self.failureException, '^Exception not raised$', - self.assertRaisesRegexp, Exception, 'x', - lambda: None) - self.assertRaisesRegexp( - self.failureException, '^Exception not raised$', - self.assertRaisesRegexp, Exception, u'x', - lambda: None) - - def testAssertRaisesRegexpInvalidRegexp(self): - # Issue 20145. - class MyExc(Exception): - pass - self.assertRaises(TypeError, self.assertRaisesRegexp, MyExc, lambda: True) - - def testAssertRaisesRegexpMismatch(self): - def Stub(): - raise Exception('Unexpected') - - self.assertRaisesRegexp( - self.failureException, - r'"\^Expected\$" does not match "Unexpected"', - self.assertRaisesRegexp, Exception, '^Expected$', - Stub) - self.assertRaisesRegexp( - self.failureException, - r'"\^Expected\$" does not match "Unexpected"', - self.assertRaisesRegexp, Exception, u'^Expected$', - Stub) - self.assertRaisesRegexp( - self.failureException, - r'"\^Expected\$" does not match "Unexpected"', - self.assertRaisesRegexp, Exception, - re.compile('^Expected$'), Stub) - - def testAssertRaisesExcValue(self): - class ExceptionMock(Exception): - pass - - def Stub(foo): - raise ExceptionMock(foo) - v = "particular value" - - ctx = self.assertRaises(ExceptionMock) - with ctx: - Stub(v) - e = ctx.exception - self.assertIsInstance(e, ExceptionMock) - self.assertEqual(e.args[0], v) - - def testSynonymAssertMethodNames(self): - """Test undocumented method name synonyms. - - Please do not use these methods names in your own code. - - This test confirms their continued existence and functionality - in order to avoid breaking existing code. - """ - self.assertNotEquals(3, 5) - self.assertEquals(3, 3) - self.assertAlmostEquals(2.0, 2.0) - self.assertNotAlmostEquals(3.0, 5.0) - self.assert_(True) - - def testPendingDeprecationMethodNames(self): - """Test fail* methods pending deprecation, they will warn in 3.2. - - Do not use these methods. They will go away in 3.3. - """ - with test_support.check_warnings(): - self.failIfEqual(3, 5) - self.failUnlessEqual(3, 3) - self.failUnlessAlmostEqual(2.0, 2.0) - self.failIfAlmostEqual(3.0, 5.0) - self.failUnless(True) - self.failUnlessRaises(TypeError, lambda _: 3.14 + u'spam') - self.failIf(False) - - def testDeepcopy(self): - # Issue: 5660 - class TestableTest(unittest.TestCase): - def testNothing(self): - pass - - test = TestableTest('testNothing') - - # This shouldn't blow up - deepcopy(test) - - def testKeyboardInterrupt(self): - def _raise(self=None): - raise KeyboardInterrupt - def nothing(self): - pass - - class Test1(unittest.TestCase): - test_something = _raise - - class Test2(unittest.TestCase): - setUp = _raise - test_something = nothing - - class Test3(unittest.TestCase): - test_something = nothing - tearDown = _raise - - class Test4(unittest.TestCase): - def test_something(self): - self.addCleanup(_raise) - - for klass in (Test1, Test2, Test3, Test4): - with self.assertRaises(KeyboardInterrupt): - klass('test_something').run() - - def testSystemExit(self): - def _raise(self=None): - raise SystemExit - def nothing(self): - pass - - class Test1(unittest.TestCase): - test_something = _raise - - class Test2(unittest.TestCase): - setUp = _raise - test_something = nothing - - class Test3(unittest.TestCase): - test_something = nothing - tearDown = _raise - - class Test4(unittest.TestCase): - def test_something(self): - self.addCleanup(_raise) - - for klass in (Test1, Test2, Test3, Test4): - result = unittest.TestResult() - klass('test_something').run(result) - self.assertEqual(len(result.errors), 1) - self.assertEqual(result.testsRun, 1) - - def testPickle(self): - # Issue 10326 - - # Can't use TestCase classes defined in Test class as - # pickle does not work with inner classes - test = unittest.TestCase('run') - for protocol in range(pickle.HIGHEST_PROTOCOL + 1): - - # blew up prior to fix - pickled_test = pickle.dumps(test, protocol=protocol) - - unpickled_test = pickle.loads(pickled_test) - self.assertEqual(test, unpickled_test) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/test/test_discovery.py b/python/Lib/unittest/test/test_discovery.py deleted file mode 100644 index 8577f05192..0000000000 --- a/python/Lib/unittest/test/test_discovery.py +++ /dev/null @@ -1,399 +0,0 @@ -import os -import re -import sys - -import unittest -import unittest.test - - -class TestDiscovery(unittest.TestCase): - - # Heavily mocked tests so I can avoid hitting the filesystem - def test_get_name_from_path(self): - loader = unittest.TestLoader() - - loader._top_level_dir = '/foo' - name = loader._get_name_from_path('/foo/bar/baz.py') - self.assertEqual(name, 'bar.baz') - - if not __debug__: - # asserts are off - return - - with self.assertRaises(AssertionError): - loader._get_name_from_path('/bar/baz.py') - - def test_find_tests(self): - loader = unittest.TestLoader() - - original_listdir = os.listdir - def restore_listdir(): - os.listdir = original_listdir - original_isfile = os.path.isfile - def restore_isfile(): - os.path.isfile = original_isfile - original_isdir = os.path.isdir - def restore_isdir(): - os.path.isdir = original_isdir - - path_lists = [['test1.py', 'test2.py', 'not_a_test.py', 'test_dir', - 'test.foo', 'test-not-a-module.py', 'another_dir'], - ['test3.py', 'test4.py', ]] - os.listdir = lambda path: path_lists.pop(0) - self.addCleanup(restore_listdir) - - def isdir(path): - return path.endswith('dir') - os.path.isdir = isdir - self.addCleanup(restore_isdir) - - def isfile(path): - # another_dir is not a package and so shouldn't be recursed into - return not path.endswith('dir') and not 'another_dir' in path - os.path.isfile = isfile - self.addCleanup(restore_isfile) - - loader._get_module_from_name = lambda path: path + ' module' - loader.loadTestsFromModule = lambda module: module + ' tests' - - top_level = os.path.abspath('/foo') - loader._top_level_dir = top_level - suite = list(loader._find_tests(top_level, 'test*.py')) - - expected = [name + ' module tests' for name in - ('test1', 'test2')] - expected.extend([('test_dir.%s' % name) + ' module tests' for name in - ('test3', 'test4')]) - self.assertEqual(suite, expected) - - def test_find_tests_with_package(self): - loader = unittest.TestLoader() - - original_listdir = os.listdir - def restore_listdir(): - os.listdir = original_listdir - original_isfile = os.path.isfile - def restore_isfile(): - os.path.isfile = original_isfile - original_isdir = os.path.isdir - def restore_isdir(): - os.path.isdir = original_isdir - - directories = ['a_directory', 'test_directory', 'test_directory2'] - path_lists = [directories, [], [], []] - os.listdir = lambda path: path_lists.pop(0) - self.addCleanup(restore_listdir) - - os.path.isdir = lambda path: True - self.addCleanup(restore_isdir) - - os.path.isfile = lambda path: os.path.basename(path) not in directories - self.addCleanup(restore_isfile) - - class Module(object): - paths = [] - load_tests_args = [] - - def __init__(self, path): - self.path = path - self.paths.append(path) - if os.path.basename(path) == 'test_directory': - def load_tests(loader, tests, pattern): - self.load_tests_args.append((loader, tests, pattern)) - return 'load_tests' - self.load_tests = load_tests - - def __eq__(self, other): - return self.path == other.path - - # Silence py3k warning - __hash__ = None - - loader._get_module_from_name = lambda name: Module(name) - def loadTestsFromModule(module, use_load_tests): - if use_load_tests: - raise self.failureException('use_load_tests should be False for packages') - return module.path + ' module tests' - loader.loadTestsFromModule = loadTestsFromModule - - loader._top_level_dir = '/foo' - # this time no '.py' on the pattern so that it can match - # a test package - suite = list(loader._find_tests('/foo', 'test*')) - - # We should have loaded tests from the test_directory package by calling load_tests - # and directly from the test_directory2 package - self.assertEqual(suite, - ['load_tests', 'test_directory2' + ' module tests']) - self.assertEqual(Module.paths, ['test_directory', 'test_directory2']) - - # load_tests should have been called once with loader, tests and pattern - self.assertEqual(Module.load_tests_args, - [(loader, 'test_directory' + ' module tests', 'test*')]) - - def test_discover(self): - loader = unittest.TestLoader() - - original_isfile = os.path.isfile - original_isdir = os.path.isdir - def restore_isfile(): - os.path.isfile = original_isfile - - os.path.isfile = lambda path: False - self.addCleanup(restore_isfile) - - orig_sys_path = sys.path[:] - def restore_path(): - sys.path[:] = orig_sys_path - self.addCleanup(restore_path) - - full_path = os.path.abspath(os.path.normpath('/foo')) - with self.assertRaises(ImportError): - loader.discover('/foo/bar', top_level_dir='/foo') - - self.assertEqual(loader._top_level_dir, full_path) - self.assertIn(full_path, sys.path) - - os.path.isfile = lambda path: True - os.path.isdir = lambda path: True - - def restore_isdir(): - os.path.isdir = original_isdir - self.addCleanup(restore_isdir) - - _find_tests_args = [] - def _find_tests(start_dir, pattern): - _find_tests_args.append((start_dir, pattern)) - return ['tests'] - loader._find_tests = _find_tests - loader.suiteClass = str - - suite = loader.discover('/foo/bar/baz', 'pattern', '/foo/bar') - - top_level_dir = os.path.abspath('/foo/bar') - start_dir = os.path.abspath('/foo/bar/baz') - self.assertEqual(suite, "['tests']") - self.assertEqual(loader._top_level_dir, top_level_dir) - self.assertEqual(_find_tests_args, [(start_dir, 'pattern')]) - self.assertIn(top_level_dir, sys.path) - - def test_discover_with_modules_that_fail_to_import(self): - loader = unittest.TestLoader() - - listdir = os.listdir - os.listdir = lambda _: ['test_this_does_not_exist.py'] - isfile = os.path.isfile - os.path.isfile = lambda _: True - orig_sys_path = sys.path[:] - def restore(): - os.path.isfile = isfile - os.listdir = listdir - sys.path[:] = orig_sys_path - self.addCleanup(restore) - - suite = loader.discover('.') - self.assertIn(os.getcwd(), sys.path) - self.assertEqual(suite.countTestCases(), 1) - test = list(list(suite)[0])[0] # extract test from suite - - with self.assertRaises(ImportError): - test.test_this_does_not_exist() - - def test_command_line_handling_parseArgs(self): - # Haha - take that uninstantiable class - program = object.__new__(unittest.TestProgram) - - args = [] - def do_discovery(argv): - args.extend(argv) - program._do_discovery = do_discovery - program.parseArgs(['something', 'discover']) - self.assertEqual(args, []) - - program.parseArgs(['something', 'discover', 'foo', 'bar']) - self.assertEqual(args, ['foo', 'bar']) - - def test_command_line_handling_do_discovery_too_many_arguments(self): - class Stop(Exception): - pass - def usageExit(): - raise Stop - - program = object.__new__(unittest.TestProgram) - program.usageExit = usageExit - program.testLoader = None - - with self.assertRaises(Stop): - # too many args - program._do_discovery(['one', 'two', 'three', 'four']) - - - def test_command_line_handling_do_discovery_uses_default_loader(self): - program = object.__new__(unittest.TestProgram) - - class Loader(object): - args = [] - def discover(self, start_dir, pattern, top_level_dir): - self.args.append((start_dir, pattern, top_level_dir)) - return 'tests' - - program.testLoader = Loader() - program._do_discovery(['-v']) - self.assertEqual(Loader.args, [('.', 'test*.py', None)]) - - def test_command_line_handling_do_discovery_calls_loader(self): - program = object.__new__(unittest.TestProgram) - - class Loader(object): - args = [] - def discover(self, start_dir, pattern, top_level_dir): - self.args.append((start_dir, pattern, top_level_dir)) - return 'tests' - - program._do_discovery(['-v'], Loader=Loader) - self.assertEqual(program.verbosity, 2) - self.assertEqual(program.test, 'tests') - self.assertEqual(Loader.args, [('.', 'test*.py', None)]) - - Loader.args = [] - program = object.__new__(unittest.TestProgram) - program._do_discovery(['--verbose'], Loader=Loader) - self.assertEqual(program.test, 'tests') - self.assertEqual(Loader.args, [('.', 'test*.py', None)]) - - Loader.args = [] - program = object.__new__(unittest.TestProgram) - program._do_discovery([], Loader=Loader) - self.assertEqual(program.test, 'tests') - self.assertEqual(Loader.args, [('.', 'test*.py', None)]) - - Loader.args = [] - program = object.__new__(unittest.TestProgram) - program._do_discovery(['fish'], Loader=Loader) - self.assertEqual(program.test, 'tests') - self.assertEqual(Loader.args, [('fish', 'test*.py', None)]) - - Loader.args = [] - program = object.__new__(unittest.TestProgram) - program._do_discovery(['fish', 'eggs'], Loader=Loader) - self.assertEqual(program.test, 'tests') - self.assertEqual(Loader.args, [('fish', 'eggs', None)]) - - Loader.args = [] - program = object.__new__(unittest.TestProgram) - program._do_discovery(['fish', 'eggs', 'ham'], Loader=Loader) - self.assertEqual(program.test, 'tests') - self.assertEqual(Loader.args, [('fish', 'eggs', 'ham')]) - - Loader.args = [] - program = object.__new__(unittest.TestProgram) - program._do_discovery(['-s', 'fish'], Loader=Loader) - self.assertEqual(program.test, 'tests') - self.assertEqual(Loader.args, [('fish', 'test*.py', None)]) - - Loader.args = [] - program = object.__new__(unittest.TestProgram) - program._do_discovery(['-t', 'fish'], Loader=Loader) - self.assertEqual(program.test, 'tests') - self.assertEqual(Loader.args, [('.', 'test*.py', 'fish')]) - - Loader.args = [] - program = object.__new__(unittest.TestProgram) - program._do_discovery(['-p', 'fish'], Loader=Loader) - self.assertEqual(program.test, 'tests') - self.assertEqual(Loader.args, [('.', 'fish', None)]) - self.assertFalse(program.failfast) - self.assertFalse(program.catchbreak) - - Loader.args = [] - program = object.__new__(unittest.TestProgram) - program._do_discovery(['-p', 'eggs', '-s', 'fish', '-v', '-f', '-c'], - Loader=Loader) - self.assertEqual(program.test, 'tests') - self.assertEqual(Loader.args, [('fish', 'eggs', None)]) - self.assertEqual(program.verbosity, 2) - self.assertTrue(program.failfast) - self.assertTrue(program.catchbreak) - - def setup_module_clash(self): - class Module(object): - __file__ = 'bar/foo.py' - sys.modules['foo'] = Module - full_path = os.path.abspath('foo') - original_listdir = os.listdir - original_isfile = os.path.isfile - original_isdir = os.path.isdir - - def cleanup(): - os.listdir = original_listdir - os.path.isfile = original_isfile - os.path.isdir = original_isdir - del sys.modules['foo'] - if full_path in sys.path: - sys.path.remove(full_path) - self.addCleanup(cleanup) - - def listdir(_): - return ['foo.py'] - def isfile(_): - return True - def isdir(_): - return True - os.listdir = listdir - os.path.isfile = isfile - os.path.isdir = isdir - return full_path - - def test_detect_module_clash(self): - full_path = self.setup_module_clash() - loader = unittest.TestLoader() - - mod_dir = os.path.abspath('bar') - expected_dir = os.path.abspath('foo') - msg = re.escape(r"'foo' module incorrectly imported from %r. Expected %r. " - "Is this module globally installed?" % (mod_dir, expected_dir)) - self.assertRaisesRegexp( - ImportError, '^%s$' % msg, loader.discover, - start_dir='foo', pattern='foo.py' - ) - self.assertEqual(sys.path[0], full_path) - - def test_module_symlink_ok(self): - full_path = self.setup_module_clash() - - original_realpath = os.path.realpath - - mod_dir = os.path.abspath('bar') - expected_dir = os.path.abspath('foo') - - def cleanup(): - os.path.realpath = original_realpath - self.addCleanup(cleanup) - - def realpath(path): - if path == os.path.join(mod_dir, 'foo.py'): - return os.path.join(expected_dir, 'foo.py') - return path - os.path.realpath = realpath - loader = unittest.TestLoader() - loader.discover(start_dir='foo', pattern='foo.py') - - def test_discovery_from_dotted_path(self): - loader = unittest.TestLoader() - - tests = [self] - expectedPath = os.path.abspath(os.path.dirname(unittest.test.__file__)) - - self.wasRun = False - def _find_tests(start_dir, pattern): - self.wasRun = True - self.assertEqual(start_dir, expectedPath) - return tests - loader._find_tests = _find_tests - suite = loader.discover('unittest.test') - self.assertTrue(self.wasRun) - self.assertEqual(suite._tests, tests) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/test/test_functiontestcase.py b/python/Lib/unittest/test/test_functiontestcase.py deleted file mode 100644 index 3a7889c297..0000000000 --- a/python/Lib/unittest/test/test_functiontestcase.py +++ /dev/null @@ -1,148 +0,0 @@ -import unittest - -from unittest.test.support import LoggingResult - - -class Test_FunctionTestCase(unittest.TestCase): - - # "Return the number of tests represented by the this test object. For - # TestCase instances, this will always be 1" - def test_countTestCases(self): - test = unittest.FunctionTestCase(lambda: None) - - self.assertEqual(test.countTestCases(), 1) - - # "When a setUp() method is defined, the test runner will run that method - # prior to each test. Likewise, if a tearDown() method is defined, the - # test runner will invoke that method after each test. In the example, - # setUp() was used to create a fresh sequence for each test." - # - # Make sure the proper call order is maintained, even if setUp() raises - # an exception. - def test_run_call_order__error_in_setUp(self): - events = [] - result = LoggingResult(events) - - def setUp(): - events.append('setUp') - raise RuntimeError('raised by setUp') - - def test(): - events.append('test') - - def tearDown(): - events.append('tearDown') - - expected = ['startTest', 'setUp', 'addError', 'stopTest'] - unittest.FunctionTestCase(test, setUp, tearDown).run(result) - self.assertEqual(events, expected) - - # "When a setUp() method is defined, the test runner will run that method - # prior to each test. Likewise, if a tearDown() method is defined, the - # test runner will invoke that method after each test. In the example, - # setUp() was used to create a fresh sequence for each test." - # - # Make sure the proper call order is maintained, even if the test raises - # an error (as opposed to a failure). - def test_run_call_order__error_in_test(self): - events = [] - result = LoggingResult(events) - - def setUp(): - events.append('setUp') - - def test(): - events.append('test') - raise RuntimeError('raised by test') - - def tearDown(): - events.append('tearDown') - - expected = ['startTest', 'setUp', 'test', 'addError', 'tearDown', - 'stopTest'] - unittest.FunctionTestCase(test, setUp, tearDown).run(result) - self.assertEqual(events, expected) - - # "When a setUp() method is defined, the test runner will run that method - # prior to each test. Likewise, if a tearDown() method is defined, the - # test runner will invoke that method after each test. In the example, - # setUp() was used to create a fresh sequence for each test." - # - # Make sure the proper call order is maintained, even if the test signals - # a failure (as opposed to an error). - def test_run_call_order__failure_in_test(self): - events = [] - result = LoggingResult(events) - - def setUp(): - events.append('setUp') - - def test(): - events.append('test') - self.fail('raised by test') - - def tearDown(): - events.append('tearDown') - - expected = ['startTest', 'setUp', 'test', 'addFailure', 'tearDown', - 'stopTest'] - unittest.FunctionTestCase(test, setUp, tearDown).run(result) - self.assertEqual(events, expected) - - # "When a setUp() method is defined, the test runner will run that method - # prior to each test. Likewise, if a tearDown() method is defined, the - # test runner will invoke that method after each test. In the example, - # setUp() was used to create a fresh sequence for each test." - # - # Make sure the proper call order is maintained, even if tearDown() raises - # an exception. - def test_run_call_order__error_in_tearDown(self): - events = [] - result = LoggingResult(events) - - def setUp(): - events.append('setUp') - - def test(): - events.append('test') - - def tearDown(): - events.append('tearDown') - raise RuntimeError('raised by tearDown') - - expected = ['startTest', 'setUp', 'test', 'tearDown', 'addError', - 'stopTest'] - unittest.FunctionTestCase(test, setUp, tearDown).run(result) - self.assertEqual(events, expected) - - # "Return a string identifying the specific test case." - # - # Because of the vague nature of the docs, I'm not going to lock this - # test down too much. Really all that can be asserted is that the id() - # will be a string (either 8-byte or unicode -- again, because the docs - # just say "string") - def test_id(self): - test = unittest.FunctionTestCase(lambda: None) - - self.assertIsInstance(test.id(), basestring) - - # "Returns a one-line description of the test, or None if no description - # has been provided. The default implementation of this method returns - # the first line of the test method's docstring, if available, or None." - def test_shortDescription__no_docstring(self): - test = unittest.FunctionTestCase(lambda: None) - - self.assertEqual(test.shortDescription(), None) - - # "Returns a one-line description of the test, or None if no description - # has been provided. The default implementation of this method returns - # the first line of the test method's docstring, if available, or None." - def test_shortDescription__singleline_docstring(self): - desc = "this tests foo" - test = unittest.FunctionTestCase(lambda: None, description=desc) - - self.assertEqual(test.shortDescription(), "this tests foo") - - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/test/test_loader.py b/python/Lib/unittest/test/test_loader.py deleted file mode 100644 index 68e871c698..0000000000 --- a/python/Lib/unittest/test/test_loader.py +++ /dev/null @@ -1,1301 +0,0 @@ -import sys -import types - - -import unittest - - -class Test_TestLoader(unittest.TestCase): - - ### Tests for TestLoader.loadTestsFromTestCase - ################################################################ - - # "Return a suite of all tests cases contained in the TestCase-derived - # class testCaseClass" - def test_loadTestsFromTestCase(self): - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foo_bar(self): pass - - tests = unittest.TestSuite([Foo('test_1'), Foo('test_2')]) - - loader = unittest.TestLoader() - self.assertEqual(loader.loadTestsFromTestCase(Foo), tests) - - # "Return a suite of all tests cases contained in the TestCase-derived - # class testCaseClass" - # - # Make sure it does the right thing even if no tests were found - def test_loadTestsFromTestCase__no_matches(self): - class Foo(unittest.TestCase): - def foo_bar(self): pass - - empty_suite = unittest.TestSuite() - - loader = unittest.TestLoader() - self.assertEqual(loader.loadTestsFromTestCase(Foo), empty_suite) - - # "Return a suite of all tests cases contained in the TestCase-derived - # class testCaseClass" - # - # What happens if loadTestsFromTestCase() is given an object - # that isn't a subclass of TestCase? Specifically, what happens - # if testCaseClass is a subclass of TestSuite? - # - # This is checked for specifically in the code, so we better add a - # test for it. - def test_loadTestsFromTestCase__TestSuite_subclass(self): - class NotATestCase(unittest.TestSuite): - pass - - loader = unittest.TestLoader() - try: - loader.loadTestsFromTestCase(NotATestCase) - except TypeError: - pass - else: - self.fail('Should raise TypeError') - - # "Return a suite of all tests cases contained in the TestCase-derived - # class testCaseClass" - # - # Make sure loadTestsFromTestCase() picks up the default test method - # name (as specified by TestCase), even though the method name does - # not match the default TestLoader.testMethodPrefix string - def test_loadTestsFromTestCase__default_method_name(self): - class Foo(unittest.TestCase): - def runTest(self): - pass - - loader = unittest.TestLoader() - # This has to be false for the test to succeed - self.assertFalse('runTest'.startswith(loader.testMethodPrefix)) - - suite = loader.loadTestsFromTestCase(Foo) - self.assertIsInstance(suite, loader.suiteClass) - self.assertEqual(list(suite), [Foo('runTest')]) - - ################################################################ - ### /Tests for TestLoader.loadTestsFromTestCase - - ### Tests for TestLoader.loadTestsFromModule - ################################################################ - - # "This method searches `module` for classes derived from TestCase" - def test_loadTestsFromModule__TestCase_subclass(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testcase_1 = MyTestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromModule(m) - self.assertIsInstance(suite, loader.suiteClass) - - expected = [loader.suiteClass([MyTestCase('test')])] - self.assertEqual(list(suite), expected) - - # "This method searches `module` for classes derived from TestCase" - # - # What happens if no tests are found (no TestCase instances)? - def test_loadTestsFromModule__no_TestCase_instances(self): - m = types.ModuleType('m') - - loader = unittest.TestLoader() - suite = loader.loadTestsFromModule(m) - self.assertIsInstance(suite, loader.suiteClass) - self.assertEqual(list(suite), []) - - # "This method searches `module` for classes derived from TestCase" - # - # What happens if no tests are found (TestCases instances, but no tests)? - def test_loadTestsFromModule__no_TestCase_tests(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - pass - m.testcase_1 = MyTestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromModule(m) - self.assertIsInstance(suite, loader.suiteClass) - - self.assertEqual(list(suite), [loader.suiteClass()]) - - # "This method searches `module` for classes derived from TestCase"s - # - # What happens if loadTestsFromModule() is given something other - # than a module? - # - # XXX Currently, it succeeds anyway. This flexibility - # should either be documented or loadTestsFromModule() should - # raise a TypeError - # - # XXX Certain people are using this behaviour. We'll add a test for it - def test_loadTestsFromModule__not_a_module(self): - class MyTestCase(unittest.TestCase): - def test(self): - pass - - class NotAModule(object): - test_2 = MyTestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromModule(NotAModule) - - reference = [unittest.TestSuite([MyTestCase('test')])] - self.assertEqual(list(suite), reference) - - - # Check that loadTestsFromModule honors (or not) a module - # with a load_tests function. - def test_loadTestsFromModule__load_tests(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testcase_1 = MyTestCase - - load_tests_args = [] - def load_tests(loader, tests, pattern): - self.assertIsInstance(tests, unittest.TestSuite) - load_tests_args.extend((loader, tests, pattern)) - return tests - m.load_tests = load_tests - - loader = unittest.TestLoader() - suite = loader.loadTestsFromModule(m) - self.assertIsInstance(suite, unittest.TestSuite) - self.assertEqual(load_tests_args, [loader, suite, None]) - - load_tests_args = [] - suite = loader.loadTestsFromModule(m, use_load_tests=False) - self.assertEqual(load_tests_args, []) - - def test_loadTestsFromModule__faulty_load_tests(self): - m = types.ModuleType('m') - - def load_tests(loader, tests, pattern): - raise TypeError('some failure') - m.load_tests = load_tests - - loader = unittest.TestLoader() - suite = loader.loadTestsFromModule(m) - self.assertIsInstance(suite, unittest.TestSuite) - self.assertEqual(suite.countTestCases(), 1) - test = list(suite)[0] - - self.assertRaisesRegexp(TypeError, "some failure", test.m) - - ################################################################ - ### /Tests for TestLoader.loadTestsFromModule() - - ### Tests for TestLoader.loadTestsFromName() - ################################################################ - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # Is ValueError raised in response to an empty name? - def test_loadTestsFromName__empty_name(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromName('') - except ValueError, e: - self.assertEqual(str(e), "Empty module name") - else: - self.fail("TestLoader.loadTestsFromName failed to raise ValueError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # What happens when the name contains invalid characters? - def test_loadTestsFromName__malformed_name(self): - loader = unittest.TestLoader() - - # XXX Should this raise ValueError or ImportError? - try: - loader.loadTestsFromName('abc () //') - except ValueError: - pass - except ImportError: - pass - else: - self.fail("TestLoader.loadTestsFromName failed to raise ValueError") - - # "The specifier name is a ``dotted name'' that may resolve ... to a - # module" - # - # What happens when a module by that name can't be found? - def test_loadTestsFromName__unknown_module_name(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromName('sdasfasfasdf') - except ImportError, e: - self.assertEqual(str(e), "No module named sdasfasfasdf") - else: - self.fail("TestLoader.loadTestsFromName failed to raise ImportError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # What happens when the module is found, but the attribute can't? - def test_loadTestsFromName__unknown_attr_name(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromName('unittest.sdasfasfasdf') - except AttributeError, e: - self.assertEqual(str(e), "'module' object has no attribute 'sdasfasfasdf'") - else: - self.fail("TestLoader.loadTestsFromName failed to raise AttributeError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # What happens when we provide the module, but the attribute can't be - # found? - def test_loadTestsFromName__relative_unknown_name(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromName('sdasfasfasdf', unittest) - except AttributeError, e: - self.assertEqual(str(e), "'module' object has no attribute 'sdasfasfasdf'") - else: - self.fail("TestLoader.loadTestsFromName failed to raise AttributeError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # ... - # "The method optionally resolves name relative to the given module" - # - # Does loadTestsFromName raise ValueError when passed an empty - # name relative to a provided module? - # - # XXX Should probably raise a ValueError instead of an AttributeError - def test_loadTestsFromName__relative_empty_name(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromName('', unittest) - except AttributeError: - pass - else: - self.fail("Failed to raise AttributeError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # ... - # "The method optionally resolves name relative to the given module" - # - # What happens when an impossible name is given, relative to the provided - # `module`? - def test_loadTestsFromName__relative_malformed_name(self): - loader = unittest.TestLoader() - - # XXX Should this raise AttributeError or ValueError? - try: - loader.loadTestsFromName('abc () //', unittest) - except ValueError: - pass - except AttributeError: - pass - else: - self.fail("TestLoader.loadTestsFromName failed to raise ValueError") - - # "The method optionally resolves name relative to the given module" - # - # Does loadTestsFromName raise TypeError when the `module` argument - # isn't a module object? - # - # XXX Accepts the not-a-module object, ignoring the object's type - # This should raise an exception or the method name should be changed - # - # XXX Some people are relying on this, so keep it for now - def test_loadTestsFromName__relative_not_a_module(self): - class MyTestCase(unittest.TestCase): - def test(self): - pass - - class NotAModule(object): - test_2 = MyTestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromName('test_2', NotAModule) - - reference = [MyTestCase('test')] - self.assertEqual(list(suite), reference) - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # Does it raise an exception if the name resolves to an invalid - # object? - def test_loadTestsFromName__relative_bad_object(self): - m = types.ModuleType('m') - m.testcase_1 = object() - - loader = unittest.TestLoader() - try: - loader.loadTestsFromName('testcase_1', m) - except TypeError: - pass - else: - self.fail("Should have raised TypeError") - - # "The specifier name is a ``dotted name'' that may - # resolve either to ... a test case class" - def test_loadTestsFromName__relative_TestCase_subclass(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testcase_1 = MyTestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromName('testcase_1', m) - self.assertIsInstance(suite, loader.suiteClass) - self.assertEqual(list(suite), [MyTestCase('test')]) - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - def test_loadTestsFromName__relative_TestSuite(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testsuite = unittest.TestSuite([MyTestCase('test')]) - - loader = unittest.TestLoader() - suite = loader.loadTestsFromName('testsuite', m) - self.assertIsInstance(suite, loader.suiteClass) - - self.assertEqual(list(suite), [MyTestCase('test')]) - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a test method within a test case class" - def test_loadTestsFromName__relative_testmethod(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testcase_1 = MyTestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromName('testcase_1.test', m) - self.assertIsInstance(suite, loader.suiteClass) - - self.assertEqual(list(suite), [MyTestCase('test')]) - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # Does loadTestsFromName() raise the proper exception when trying to - # resolve "a test method within a test case class" that doesn't exist - # for the given name (relative to a provided module)? - def test_loadTestsFromName__relative_invalid_testmethod(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testcase_1 = MyTestCase - - loader = unittest.TestLoader() - try: - loader.loadTestsFromName('testcase_1.testfoo', m) - except AttributeError, e: - self.assertEqual(str(e), "type object 'MyTestCase' has no attribute 'testfoo'") - else: - self.fail("Failed to raise AttributeError") - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a callable object which returns a ... TestSuite instance" - def test_loadTestsFromName__callable__TestSuite(self): - m = types.ModuleType('m') - testcase_1 = unittest.FunctionTestCase(lambda: None) - testcase_2 = unittest.FunctionTestCase(lambda: None) - def return_TestSuite(): - return unittest.TestSuite([testcase_1, testcase_2]) - m.return_TestSuite = return_TestSuite - - loader = unittest.TestLoader() - suite = loader.loadTestsFromName('return_TestSuite', m) - self.assertIsInstance(suite, loader.suiteClass) - self.assertEqual(list(suite), [testcase_1, testcase_2]) - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a callable object which returns a TestCase ... instance" - def test_loadTestsFromName__callable__TestCase_instance(self): - m = types.ModuleType('m') - testcase_1 = unittest.FunctionTestCase(lambda: None) - def return_TestCase(): - return testcase_1 - m.return_TestCase = return_TestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromName('return_TestCase', m) - self.assertIsInstance(suite, loader.suiteClass) - self.assertEqual(list(suite), [testcase_1]) - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a callable object which returns a TestCase ... instance" - #***************************************************************** - #Override the suiteClass attribute to ensure that the suiteClass - #attribute is used - def test_loadTestsFromName__callable__TestCase_instance_ProperSuiteClass(self): - class SubTestSuite(unittest.TestSuite): - pass - m = types.ModuleType('m') - testcase_1 = unittest.FunctionTestCase(lambda: None) - def return_TestCase(): - return testcase_1 - m.return_TestCase = return_TestCase - - loader = unittest.TestLoader() - loader.suiteClass = SubTestSuite - suite = loader.loadTestsFromName('return_TestCase', m) - self.assertIsInstance(suite, loader.suiteClass) - self.assertEqual(list(suite), [testcase_1]) - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a test method within a test case class" - #***************************************************************** - #Override the suiteClass attribute to ensure that the suiteClass - #attribute is used - def test_loadTestsFromName__relative_testmethod_ProperSuiteClass(self): - class SubTestSuite(unittest.TestSuite): - pass - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testcase_1 = MyTestCase - - loader = unittest.TestLoader() - loader.suiteClass=SubTestSuite - suite = loader.loadTestsFromName('testcase_1.test', m) - self.assertIsInstance(suite, loader.suiteClass) - - self.assertEqual(list(suite), [MyTestCase('test')]) - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a callable object which returns a TestCase or TestSuite instance" - # - # What happens if the callable returns something else? - def test_loadTestsFromName__callable__wrong_type(self): - m = types.ModuleType('m') - def return_wrong(): - return 6 - m.return_wrong = return_wrong - - loader = unittest.TestLoader() - try: - loader.loadTestsFromName('return_wrong', m) - except TypeError: - pass - else: - self.fail("TestLoader.loadTestsFromName failed to raise TypeError") - - # "The specifier can refer to modules and packages which have not been - # imported; they will be imported as a side-effect" - def test_loadTestsFromName__module_not_loaded(self): - # We're going to try to load this module as a side-effect, so it - # better not be loaded before we try. - # - module_name = 'unittest.test.dummy' - sys.modules.pop(module_name, None) - - loader = unittest.TestLoader() - try: - suite = loader.loadTestsFromName(module_name) - - self.assertIsInstance(suite, loader.suiteClass) - self.assertEqual(list(suite), []) - - # module should now be loaded, thanks to loadTestsFromName() - self.assertIn(module_name, sys.modules) - finally: - if module_name in sys.modules: - del sys.modules[module_name] - - ################################################################ - ### Tests for TestLoader.loadTestsFromName() - - ### Tests for TestLoader.loadTestsFromNames() - ################################################################ - - # "Similar to loadTestsFromName(), but takes a sequence of names rather - # than a single name." - # - # What happens if that sequence of names is empty? - def test_loadTestsFromNames__empty_name_list(self): - loader = unittest.TestLoader() - - suite = loader.loadTestsFromNames([]) - self.assertIsInstance(suite, loader.suiteClass) - self.assertEqual(list(suite), []) - - # "Similar to loadTestsFromName(), but takes a sequence of names rather - # than a single name." - # ... - # "The method optionally resolves name relative to the given module" - # - # What happens if that sequence of names is empty? - # - # XXX Should this raise a ValueError or just return an empty TestSuite? - def test_loadTestsFromNames__relative_empty_name_list(self): - loader = unittest.TestLoader() - - suite = loader.loadTestsFromNames([], unittest) - self.assertIsInstance(suite, loader.suiteClass) - self.assertEqual(list(suite), []) - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # Is ValueError raised in response to an empty name? - def test_loadTestsFromNames__empty_name(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromNames(['']) - except ValueError, e: - self.assertEqual(str(e), "Empty module name") - else: - self.fail("TestLoader.loadTestsFromNames failed to raise ValueError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # What happens when presented with an impossible module name? - def test_loadTestsFromNames__malformed_name(self): - loader = unittest.TestLoader() - - # XXX Should this raise ValueError or ImportError? - try: - loader.loadTestsFromNames(['abc () //']) - except ValueError: - pass - except ImportError: - pass - else: - self.fail("TestLoader.loadTestsFromNames failed to raise ValueError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # What happens when no module can be found for the given name? - def test_loadTestsFromNames__unknown_module_name(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromNames(['sdasfasfasdf']) - except ImportError, e: - self.assertEqual(str(e), "No module named sdasfasfasdf") - else: - self.fail("TestLoader.loadTestsFromNames failed to raise ImportError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # What happens when the module can be found, but not the attribute? - def test_loadTestsFromNames__unknown_attr_name(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromNames(['unittest.sdasfasfasdf', 'unittest']) - except AttributeError, e: - self.assertEqual(str(e), "'module' object has no attribute 'sdasfasfasdf'") - else: - self.fail("TestLoader.loadTestsFromNames failed to raise AttributeError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # ... - # "The method optionally resolves name relative to the given module" - # - # What happens when given an unknown attribute on a specified `module` - # argument? - def test_loadTestsFromNames__unknown_name_relative_1(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromNames(['sdasfasfasdf'], unittest) - except AttributeError, e: - self.assertEqual(str(e), "'module' object has no attribute 'sdasfasfasdf'") - else: - self.fail("TestLoader.loadTestsFromName failed to raise AttributeError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # ... - # "The method optionally resolves name relative to the given module" - # - # Do unknown attributes (relative to a provided module) still raise an - # exception even in the presence of valid attribute names? - def test_loadTestsFromNames__unknown_name_relative_2(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromNames(['TestCase', 'sdasfasfasdf'], unittest) - except AttributeError, e: - self.assertEqual(str(e), "'module' object has no attribute 'sdasfasfasdf'") - else: - self.fail("TestLoader.loadTestsFromName failed to raise AttributeError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # ... - # "The method optionally resolves name relative to the given module" - # - # What happens when faced with the empty string? - # - # XXX This currently raises AttributeError, though ValueError is probably - # more appropriate - def test_loadTestsFromNames__relative_empty_name(self): - loader = unittest.TestLoader() - - try: - loader.loadTestsFromNames([''], unittest) - except AttributeError: - pass - else: - self.fail("Failed to raise ValueError") - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # ... - # "The method optionally resolves name relative to the given module" - # - # What happens when presented with an impossible attribute name? - def test_loadTestsFromNames__relative_malformed_name(self): - loader = unittest.TestLoader() - - # XXX Should this raise AttributeError or ValueError? - try: - loader.loadTestsFromNames(['abc () //'], unittest) - except AttributeError: - pass - except ValueError: - pass - else: - self.fail("TestLoader.loadTestsFromNames failed to raise ValueError") - - # "The method optionally resolves name relative to the given module" - # - # Does loadTestsFromNames() make sure the provided `module` is in fact - # a module? - # - # XXX This validation is currently not done. This flexibility should - # either be documented or a TypeError should be raised. - def test_loadTestsFromNames__relative_not_a_module(self): - class MyTestCase(unittest.TestCase): - def test(self): - pass - - class NotAModule(object): - test_2 = MyTestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromNames(['test_2'], NotAModule) - - reference = [unittest.TestSuite([MyTestCase('test')])] - self.assertEqual(list(suite), reference) - - # "The specifier name is a ``dotted name'' that may resolve either to - # a module, a test case class, a TestSuite instance, a test method - # within a test case class, or a callable object which returns a - # TestCase or TestSuite instance." - # - # Does it raise an exception if the name resolves to an invalid - # object? - def test_loadTestsFromNames__relative_bad_object(self): - m = types.ModuleType('m') - m.testcase_1 = object() - - loader = unittest.TestLoader() - try: - loader.loadTestsFromNames(['testcase_1'], m) - except TypeError: - pass - else: - self.fail("Should have raised TypeError") - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a test case class" - def test_loadTestsFromNames__relative_TestCase_subclass(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testcase_1 = MyTestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromNames(['testcase_1'], m) - self.assertIsInstance(suite, loader.suiteClass) - - expected = loader.suiteClass([MyTestCase('test')]) - self.assertEqual(list(suite), [expected]) - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a TestSuite instance" - def test_loadTestsFromNames__relative_TestSuite(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testsuite = unittest.TestSuite([MyTestCase('test')]) - - loader = unittest.TestLoader() - suite = loader.loadTestsFromNames(['testsuite'], m) - self.assertIsInstance(suite, loader.suiteClass) - - self.assertEqual(list(suite), [m.testsuite]) - - # "The specifier name is a ``dotted name'' that may resolve ... to ... a - # test method within a test case class" - def test_loadTestsFromNames__relative_testmethod(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testcase_1 = MyTestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromNames(['testcase_1.test'], m) - self.assertIsInstance(suite, loader.suiteClass) - - ref_suite = unittest.TestSuite([MyTestCase('test')]) - self.assertEqual(list(suite), [ref_suite]) - - # "The specifier name is a ``dotted name'' that may resolve ... to ... a - # test method within a test case class" - # - # Does the method gracefully handle names that initially look like they - # resolve to "a test method within a test case class" but don't? - def test_loadTestsFromNames__relative_invalid_testmethod(self): - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - def test(self): - pass - m.testcase_1 = MyTestCase - - loader = unittest.TestLoader() - try: - loader.loadTestsFromNames(['testcase_1.testfoo'], m) - except AttributeError, e: - self.assertEqual(str(e), "type object 'MyTestCase' has no attribute 'testfoo'") - else: - self.fail("Failed to raise AttributeError") - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a callable object which returns a ... TestSuite instance" - def test_loadTestsFromNames__callable__TestSuite(self): - m = types.ModuleType('m') - testcase_1 = unittest.FunctionTestCase(lambda: None) - testcase_2 = unittest.FunctionTestCase(lambda: None) - def return_TestSuite(): - return unittest.TestSuite([testcase_1, testcase_2]) - m.return_TestSuite = return_TestSuite - - loader = unittest.TestLoader() - suite = loader.loadTestsFromNames(['return_TestSuite'], m) - self.assertIsInstance(suite, loader.suiteClass) - - expected = unittest.TestSuite([testcase_1, testcase_2]) - self.assertEqual(list(suite), [expected]) - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a callable object which returns a TestCase ... instance" - def test_loadTestsFromNames__callable__TestCase_instance(self): - m = types.ModuleType('m') - testcase_1 = unittest.FunctionTestCase(lambda: None) - def return_TestCase(): - return testcase_1 - m.return_TestCase = return_TestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromNames(['return_TestCase'], m) - self.assertIsInstance(suite, loader.suiteClass) - - ref_suite = unittest.TestSuite([testcase_1]) - self.assertEqual(list(suite), [ref_suite]) - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a callable object which returns a TestCase or TestSuite instance" - # - # Are staticmethods handled correctly? - def test_loadTestsFromNames__callable__call_staticmethod(self): - m = types.ModuleType('m') - class Test1(unittest.TestCase): - def test(self): - pass - - testcase_1 = Test1('test') - class Foo(unittest.TestCase): - @staticmethod - def foo(): - return testcase_1 - m.Foo = Foo - - loader = unittest.TestLoader() - suite = loader.loadTestsFromNames(['Foo.foo'], m) - self.assertIsInstance(suite, loader.suiteClass) - - ref_suite = unittest.TestSuite([testcase_1]) - self.assertEqual(list(suite), [ref_suite]) - - # "The specifier name is a ``dotted name'' that may resolve ... to - # ... a callable object which returns a TestCase or TestSuite instance" - # - # What happens when the callable returns something else? - def test_loadTestsFromNames__callable__wrong_type(self): - m = types.ModuleType('m') - def return_wrong(): - return 6 - m.return_wrong = return_wrong - - loader = unittest.TestLoader() - try: - loader.loadTestsFromNames(['return_wrong'], m) - except TypeError: - pass - else: - self.fail("TestLoader.loadTestsFromNames failed to raise TypeError") - - # "The specifier can refer to modules and packages which have not been - # imported; they will be imported as a side-effect" - def test_loadTestsFromNames__module_not_loaded(self): - # We're going to try to load this module as a side-effect, so it - # better not be loaded before we try. - # - module_name = 'unittest.test.dummy' - sys.modules.pop(module_name, None) - - loader = unittest.TestLoader() - try: - suite = loader.loadTestsFromNames([module_name]) - - self.assertIsInstance(suite, loader.suiteClass) - self.assertEqual(list(suite), [unittest.TestSuite()]) - - # module should now be loaded, thanks to loadTestsFromName() - self.assertIn(module_name, sys.modules) - finally: - if module_name in sys.modules: - del sys.modules[module_name] - - ################################################################ - ### /Tests for TestLoader.loadTestsFromNames() - - ### Tests for TestLoader.getTestCaseNames() - ################################################################ - - # "Return a sorted sequence of method names found within testCaseClass" - # - # Test.foobar is defined to make sure getTestCaseNames() respects - # loader.testMethodPrefix - def test_getTestCaseNames(self): - class Test(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foobar(self): pass - - loader = unittest.TestLoader() - - self.assertEqual(loader.getTestCaseNames(Test), ['test_1', 'test_2']) - - # "Return a sorted sequence of method names found within testCaseClass" - # - # Does getTestCaseNames() behave appropriately if no tests are found? - def test_getTestCaseNames__no_tests(self): - class Test(unittest.TestCase): - def foobar(self): pass - - loader = unittest.TestLoader() - - self.assertEqual(loader.getTestCaseNames(Test), []) - - # "Return a sorted sequence of method names found within testCaseClass" - # - # Are not-TestCases handled gracefully? - # - # XXX This should raise a TypeError, not return a list - # - # XXX It's too late in the 2.5 release cycle to fix this, but it should - # probably be revisited for 2.6 - def test_getTestCaseNames__not_a_TestCase(self): - class BadCase(int): - def test_foo(self): - pass - - loader = unittest.TestLoader() - names = loader.getTestCaseNames(BadCase) - - self.assertEqual(names, ['test_foo']) - - # "Return a sorted sequence of method names found within testCaseClass" - # - # Make sure inherited names are handled. - # - # TestP.foobar is defined to make sure getTestCaseNames() respects - # loader.testMethodPrefix - def test_getTestCaseNames__inheritance(self): - class TestP(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foobar(self): pass - - class TestC(TestP): - def test_1(self): pass - def test_3(self): pass - - loader = unittest.TestLoader() - - names = ['test_1', 'test_2', 'test_3'] - self.assertEqual(loader.getTestCaseNames(TestC), names) - - ################################################################ - ### /Tests for TestLoader.getTestCaseNames() - - ### Tests for TestLoader.testMethodPrefix - ################################################################ - - # "String giving the prefix of method names which will be interpreted as - # test methods" - # - # Implicit in the documentation is that testMethodPrefix is respected by - # all loadTestsFrom* methods. - def test_testMethodPrefix__loadTestsFromTestCase(self): - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foo_bar(self): pass - - tests_1 = unittest.TestSuite([Foo('foo_bar')]) - tests_2 = unittest.TestSuite([Foo('test_1'), Foo('test_2')]) - - loader = unittest.TestLoader() - loader.testMethodPrefix = 'foo' - self.assertEqual(loader.loadTestsFromTestCase(Foo), tests_1) - - loader.testMethodPrefix = 'test' - self.assertEqual(loader.loadTestsFromTestCase(Foo), tests_2) - - # "String giving the prefix of method names which will be interpreted as - # test methods" - # - # Implicit in the documentation is that testMethodPrefix is respected by - # all loadTestsFrom* methods. - def test_testMethodPrefix__loadTestsFromModule(self): - m = types.ModuleType('m') - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foo_bar(self): pass - m.Foo = Foo - - tests_1 = [unittest.TestSuite([Foo('foo_bar')])] - tests_2 = [unittest.TestSuite([Foo('test_1'), Foo('test_2')])] - - loader = unittest.TestLoader() - loader.testMethodPrefix = 'foo' - self.assertEqual(list(loader.loadTestsFromModule(m)), tests_1) - - loader.testMethodPrefix = 'test' - self.assertEqual(list(loader.loadTestsFromModule(m)), tests_2) - - # "String giving the prefix of method names which will be interpreted as - # test methods" - # - # Implicit in the documentation is that testMethodPrefix is respected by - # all loadTestsFrom* methods. - def test_testMethodPrefix__loadTestsFromName(self): - m = types.ModuleType('m') - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foo_bar(self): pass - m.Foo = Foo - - tests_1 = unittest.TestSuite([Foo('foo_bar')]) - tests_2 = unittest.TestSuite([Foo('test_1'), Foo('test_2')]) - - loader = unittest.TestLoader() - loader.testMethodPrefix = 'foo' - self.assertEqual(loader.loadTestsFromName('Foo', m), tests_1) - - loader.testMethodPrefix = 'test' - self.assertEqual(loader.loadTestsFromName('Foo', m), tests_2) - - # "String giving the prefix of method names which will be interpreted as - # test methods" - # - # Implicit in the documentation is that testMethodPrefix is respected by - # all loadTestsFrom* methods. - def test_testMethodPrefix__loadTestsFromNames(self): - m = types.ModuleType('m') - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foo_bar(self): pass - m.Foo = Foo - - tests_1 = unittest.TestSuite([unittest.TestSuite([Foo('foo_bar')])]) - tests_2 = unittest.TestSuite([Foo('test_1'), Foo('test_2')]) - tests_2 = unittest.TestSuite([tests_2]) - - loader = unittest.TestLoader() - loader.testMethodPrefix = 'foo' - self.assertEqual(loader.loadTestsFromNames(['Foo'], m), tests_1) - - loader.testMethodPrefix = 'test' - self.assertEqual(loader.loadTestsFromNames(['Foo'], m), tests_2) - - # "The default value is 'test'" - def test_testMethodPrefix__default_value(self): - loader = unittest.TestLoader() - self.assertTrue(loader.testMethodPrefix == 'test') - - ################################################################ - ### /Tests for TestLoader.testMethodPrefix - - ### Tests for TestLoader.sortTestMethodsUsing - ################################################################ - - # "Function to be used to compare method names when sorting them in - # getTestCaseNames() and all the loadTestsFromX() methods" - def test_sortTestMethodsUsing__loadTestsFromTestCase(self): - def reversed_cmp(x, y): - return -cmp(x, y) - - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - - loader = unittest.TestLoader() - loader.sortTestMethodsUsing = reversed_cmp - - tests = loader.suiteClass([Foo('test_2'), Foo('test_1')]) - self.assertEqual(loader.loadTestsFromTestCase(Foo), tests) - - # "Function to be used to compare method names when sorting them in - # getTestCaseNames() and all the loadTestsFromX() methods" - def test_sortTestMethodsUsing__loadTestsFromModule(self): - def reversed_cmp(x, y): - return -cmp(x, y) - - m = types.ModuleType('m') - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - m.Foo = Foo - - loader = unittest.TestLoader() - loader.sortTestMethodsUsing = reversed_cmp - - tests = [loader.suiteClass([Foo('test_2'), Foo('test_1')])] - self.assertEqual(list(loader.loadTestsFromModule(m)), tests) - - # "Function to be used to compare method names when sorting them in - # getTestCaseNames() and all the loadTestsFromX() methods" - def test_sortTestMethodsUsing__loadTestsFromName(self): - def reversed_cmp(x, y): - return -cmp(x, y) - - m = types.ModuleType('m') - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - m.Foo = Foo - - loader = unittest.TestLoader() - loader.sortTestMethodsUsing = reversed_cmp - - tests = loader.suiteClass([Foo('test_2'), Foo('test_1')]) - self.assertEqual(loader.loadTestsFromName('Foo', m), tests) - - # "Function to be used to compare method names when sorting them in - # getTestCaseNames() and all the loadTestsFromX() methods" - def test_sortTestMethodsUsing__loadTestsFromNames(self): - def reversed_cmp(x, y): - return -cmp(x, y) - - m = types.ModuleType('m') - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - m.Foo = Foo - - loader = unittest.TestLoader() - loader.sortTestMethodsUsing = reversed_cmp - - tests = [loader.suiteClass([Foo('test_2'), Foo('test_1')])] - self.assertEqual(list(loader.loadTestsFromNames(['Foo'], m)), tests) - - # "Function to be used to compare method names when sorting them in - # getTestCaseNames()" - # - # Does it actually affect getTestCaseNames()? - def test_sortTestMethodsUsing__getTestCaseNames(self): - def reversed_cmp(x, y): - return -cmp(x, y) - - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - - loader = unittest.TestLoader() - loader.sortTestMethodsUsing = reversed_cmp - - test_names = ['test_2', 'test_1'] - self.assertEqual(loader.getTestCaseNames(Foo), test_names) - - # "The default value is the built-in cmp() function" - def test_sortTestMethodsUsing__default_value(self): - loader = unittest.TestLoader() - self.assertTrue(loader.sortTestMethodsUsing is cmp) - - # "it can be set to None to disable the sort." - # - # XXX How is this different from reassigning cmp? Are the tests returned - # in a random order or something? This behaviour should die - def test_sortTestMethodsUsing__None(self): - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - - loader = unittest.TestLoader() - loader.sortTestMethodsUsing = None - - test_names = ['test_2', 'test_1'] - self.assertEqual(set(loader.getTestCaseNames(Foo)), set(test_names)) - - ################################################################ - ### /Tests for TestLoader.sortTestMethodsUsing - - ### Tests for TestLoader.suiteClass - ################################################################ - - # "Callable object that constructs a test suite from a list of tests." - def test_suiteClass__loadTestsFromTestCase(self): - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foo_bar(self): pass - - tests = [Foo('test_1'), Foo('test_2')] - - loader = unittest.TestLoader() - loader.suiteClass = list - self.assertEqual(loader.loadTestsFromTestCase(Foo), tests) - - # It is implicit in the documentation for TestLoader.suiteClass that - # all TestLoader.loadTestsFrom* methods respect it. Let's make sure - def test_suiteClass__loadTestsFromModule(self): - m = types.ModuleType('m') - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foo_bar(self): pass - m.Foo = Foo - - tests = [[Foo('test_1'), Foo('test_2')]] - - loader = unittest.TestLoader() - loader.suiteClass = list - self.assertEqual(loader.loadTestsFromModule(m), tests) - - # It is implicit in the documentation for TestLoader.suiteClass that - # all TestLoader.loadTestsFrom* methods respect it. Let's make sure - def test_suiteClass__loadTestsFromName(self): - m = types.ModuleType('m') - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foo_bar(self): pass - m.Foo = Foo - - tests = [Foo('test_1'), Foo('test_2')] - - loader = unittest.TestLoader() - loader.suiteClass = list - self.assertEqual(loader.loadTestsFromName('Foo', m), tests) - - # It is implicit in the documentation for TestLoader.suiteClass that - # all TestLoader.loadTestsFrom* methods respect it. Let's make sure - def test_suiteClass__loadTestsFromNames(self): - m = types.ModuleType('m') - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def foo_bar(self): pass - m.Foo = Foo - - tests = [[Foo('test_1'), Foo('test_2')]] - - loader = unittest.TestLoader() - loader.suiteClass = list - self.assertEqual(loader.loadTestsFromNames(['Foo'], m), tests) - - # "The default value is the TestSuite class" - def test_suiteClass__default_value(self): - loader = unittest.TestLoader() - self.assertIs(loader.suiteClass, unittest.TestSuite) - - # Make sure the dotted name resolution works even if the actual - # function doesn't have the same name as is used to find it. - def test_loadTestsFromName__function_with_different_name_than_method(self): - # lambdas have the name ''. - m = types.ModuleType('m') - class MyTestCase(unittest.TestCase): - test = lambda: 1 - m.testcase_1 = MyTestCase - - loader = unittest.TestLoader() - suite = loader.loadTestsFromNames(['testcase_1.test'], m) - self.assertIsInstance(suite, loader.suiteClass) - - ref_suite = unittest.TestSuite([MyTestCase('test')]) - self.assertEqual(list(suite), [ref_suite]) - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/test/test_program.py b/python/Lib/unittest/test/test_program.py deleted file mode 100644 index 8eca83d620..0000000000 --- a/python/Lib/unittest/test/test_program.py +++ /dev/null @@ -1,256 +0,0 @@ -from cStringIO import StringIO - -import os -import sys -import unittest -import unittest.test - - -class Test_TestProgram(unittest.TestCase): - - def test_discovery_from_dotted_path(self): - loader = unittest.TestLoader() - - tests = [self] - expectedPath = os.path.abspath(os.path.dirname(unittest.test.__file__)) - - self.wasRun = False - def _find_tests(start_dir, pattern): - self.wasRun = True - self.assertEqual(start_dir, expectedPath) - return tests - loader._find_tests = _find_tests - suite = loader.discover('unittest.test') - self.assertTrue(self.wasRun) - self.assertEqual(suite._tests, tests) - - # Horrible white box test - def testNoExit(self): - result = object() - test = object() - - class FakeRunner(object): - def run(self, test): - self.test = test - return result - - runner = FakeRunner() - - oldParseArgs = unittest.TestProgram.parseArgs - def restoreParseArgs(): - unittest.TestProgram.parseArgs = oldParseArgs - unittest.TestProgram.parseArgs = lambda *args: None - self.addCleanup(restoreParseArgs) - - def removeTest(): - del unittest.TestProgram.test - unittest.TestProgram.test = test - self.addCleanup(removeTest) - - program = unittest.TestProgram(testRunner=runner, exit=False, verbosity=2) - - self.assertEqual(program.result, result) - self.assertEqual(runner.test, test) - self.assertEqual(program.verbosity, 2) - - class FooBar(unittest.TestCase): - def testPass(self): - assert True - def testFail(self): - assert False - - class FooBarLoader(unittest.TestLoader): - """Test loader that returns a suite containing FooBar.""" - def loadTestsFromModule(self, module): - return self.suiteClass( - [self.loadTestsFromTestCase(Test_TestProgram.FooBar)]) - - - def test_NonExit(self): - program = unittest.main(exit=False, - argv=["foobar"], - testRunner=unittest.TextTestRunner(stream=StringIO()), - testLoader=self.FooBarLoader()) - self.assertTrue(hasattr(program, 'result')) - - - def test_Exit(self): - self.assertRaises( - SystemExit, - unittest.main, - argv=["foobar"], - testRunner=unittest.TextTestRunner(stream=StringIO()), - exit=True, - testLoader=self.FooBarLoader()) - - - def test_ExitAsDefault(self): - self.assertRaises( - SystemExit, - unittest.main, - argv=["foobar"], - testRunner=unittest.TextTestRunner(stream=StringIO()), - testLoader=self.FooBarLoader()) - - -class InitialisableProgram(unittest.TestProgram): - exit = False - result = None - verbosity = 1 - defaultTest = None - testRunner = None - testLoader = unittest.defaultTestLoader - progName = 'test' - test = 'test' - def __init__(self, *args): - pass - -RESULT = object() - -class FakeRunner(object): - initArgs = None - test = None - raiseError = False - - def __init__(self, **kwargs): - FakeRunner.initArgs = kwargs - if FakeRunner.raiseError: - FakeRunner.raiseError = False - raise TypeError - - def run(self, test): - FakeRunner.test = test - return RESULT - -class TestCommandLineArgs(unittest.TestCase): - - def setUp(self): - self.program = InitialisableProgram() - self.program.createTests = lambda: None - FakeRunner.initArgs = None - FakeRunner.test = None - FakeRunner.raiseError = False - - def testHelpAndUnknown(self): - program = self.program - def usageExit(msg=None): - program.msg = msg - program.exit = True - program.usageExit = usageExit - - for opt in '-h', '-H', '--help': - program.exit = False - program.parseArgs([None, opt]) - self.assertTrue(program.exit) - self.assertIsNone(program.msg) - - program.parseArgs([None, '-$']) - self.assertTrue(program.exit) - self.assertIsNotNone(program.msg) - - def testVerbosity(self): - program = self.program - - for opt in '-q', '--quiet': - program.verbosity = 1 - program.parseArgs([None, opt]) - self.assertEqual(program.verbosity, 0) - - for opt in '-v', '--verbose': - program.verbosity = 1 - program.parseArgs([None, opt]) - self.assertEqual(program.verbosity, 2) - - def testBufferCatchFailfast(self): - program = self.program - for arg, attr in (('buffer', 'buffer'), ('failfast', 'failfast'), - ('catch', 'catchbreak')): - if attr == 'catch' and not hasInstallHandler: - continue - - short_opt = '-%s' % arg[0] - long_opt = '--%s' % arg - for opt in short_opt, long_opt: - setattr(program, attr, None) - - program.parseArgs([None, opt]) - self.assertTrue(getattr(program, attr)) - - for opt in short_opt, long_opt: - not_none = object() - setattr(program, attr, not_none) - - program.parseArgs([None, opt]) - self.assertEqual(getattr(program, attr), not_none) - - def testRunTestsRunnerClass(self): - program = self.program - - program.testRunner = FakeRunner - program.verbosity = 'verbosity' - program.failfast = 'failfast' - program.buffer = 'buffer' - - program.runTests() - - self.assertEqual(FakeRunner.initArgs, {'verbosity': 'verbosity', - 'failfast': 'failfast', - 'buffer': 'buffer'}) - self.assertEqual(FakeRunner.test, 'test') - self.assertIs(program.result, RESULT) - - def testRunTestsRunnerInstance(self): - program = self.program - - program.testRunner = FakeRunner() - FakeRunner.initArgs = None - - program.runTests() - - # A new FakeRunner should not have been instantiated - self.assertIsNone(FakeRunner.initArgs) - - self.assertEqual(FakeRunner.test, 'test') - self.assertIs(program.result, RESULT) - - def testRunTestsOldRunnerClass(self): - program = self.program - - FakeRunner.raiseError = True - program.testRunner = FakeRunner - program.verbosity = 'verbosity' - program.failfast = 'failfast' - program.buffer = 'buffer' - program.test = 'test' - - program.runTests() - - # If initializing raises a type error it should be retried - # without the new keyword arguments - self.assertEqual(FakeRunner.initArgs, {}) - self.assertEqual(FakeRunner.test, 'test') - self.assertIs(program.result, RESULT) - - def testCatchBreakInstallsHandler(self): - module = sys.modules['unittest.main'] - original = module.installHandler - def restore(): - module.installHandler = original - self.addCleanup(restore) - - self.installed = False - def fakeInstallHandler(): - self.installed = True - module.installHandler = fakeInstallHandler - - program = self.program - program.catchbreak = True - - program.testRunner = FakeRunner - - program.runTests() - self.assertTrue(self.installed) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/test/test_result.py b/python/Lib/unittest/test/test_result.py deleted file mode 100644 index 2a596cdb6f..0000000000 --- a/python/Lib/unittest/test/test_result.py +++ /dev/null @@ -1,567 +0,0 @@ -import sys -import textwrap -from StringIO import StringIO -from test import test_support - -import traceback -import unittest - - -class Test_TestResult(unittest.TestCase): - # Note: there are not separate tests for TestResult.wasSuccessful(), - # TestResult.errors, TestResult.failures, TestResult.testsRun or - # TestResult.shouldStop because these only have meaning in terms of - # other TestResult methods. - # - # Accordingly, tests for the aforenamed attributes are incorporated - # in with the tests for the defining methods. - ################################################################ - - def test_init(self): - result = unittest.TestResult() - - self.assertTrue(result.wasSuccessful()) - self.assertEqual(len(result.errors), 0) - self.assertEqual(len(result.failures), 0) - self.assertEqual(result.testsRun, 0) - self.assertEqual(result.shouldStop, False) - self.assertIsNone(result._stdout_buffer) - self.assertIsNone(result._stderr_buffer) - - - # "This method can be called to signal that the set of tests being - # run should be aborted by setting the TestResult's shouldStop - # attribute to True." - def test_stop(self): - result = unittest.TestResult() - - result.stop() - - self.assertEqual(result.shouldStop, True) - - # "Called when the test case test is about to be run. The default - # implementation simply increments the instance's testsRun counter." - def test_startTest(self): - class Foo(unittest.TestCase): - def test_1(self): - pass - - test = Foo('test_1') - - result = unittest.TestResult() - - result.startTest(test) - - self.assertTrue(result.wasSuccessful()) - self.assertEqual(len(result.errors), 0) - self.assertEqual(len(result.failures), 0) - self.assertEqual(result.testsRun, 1) - self.assertEqual(result.shouldStop, False) - - result.stopTest(test) - - # "Called after the test case test has been executed, regardless of - # the outcome. The default implementation does nothing." - def test_stopTest(self): - class Foo(unittest.TestCase): - def test_1(self): - pass - - test = Foo('test_1') - - result = unittest.TestResult() - - result.startTest(test) - - self.assertTrue(result.wasSuccessful()) - self.assertEqual(len(result.errors), 0) - self.assertEqual(len(result.failures), 0) - self.assertEqual(result.testsRun, 1) - self.assertEqual(result.shouldStop, False) - - result.stopTest(test) - - # Same tests as above; make sure nothing has changed - self.assertTrue(result.wasSuccessful()) - self.assertEqual(len(result.errors), 0) - self.assertEqual(len(result.failures), 0) - self.assertEqual(result.testsRun, 1) - self.assertEqual(result.shouldStop, False) - - # "Called before and after tests are run. The default implementation does nothing." - def test_startTestRun_stopTestRun(self): - result = unittest.TestResult() - result.startTestRun() - result.stopTestRun() - - # "addSuccess(test)" - # ... - # "Called when the test case test succeeds" - # ... - # "wasSuccessful() - Returns True if all tests run so far have passed, - # otherwise returns False" - # ... - # "testsRun - The total number of tests run so far." - # ... - # "errors - A list containing 2-tuples of TestCase instances and - # formatted tracebacks. Each tuple represents a test which raised an - # unexpected exception. Contains formatted - # tracebacks instead of sys.exc_info() results." - # ... - # "failures - A list containing 2-tuples of TestCase instances and - # formatted tracebacks. Each tuple represents a test where a failure was - # explicitly signalled using the TestCase.fail*() or TestCase.assert*() - # methods. Contains formatted tracebacks instead - # of sys.exc_info() results." - def test_addSuccess(self): - class Foo(unittest.TestCase): - def test_1(self): - pass - - test = Foo('test_1') - - result = unittest.TestResult() - - result.startTest(test) - result.addSuccess(test) - result.stopTest(test) - - self.assertTrue(result.wasSuccessful()) - self.assertEqual(len(result.errors), 0) - self.assertEqual(len(result.failures), 0) - self.assertEqual(result.testsRun, 1) - self.assertEqual(result.shouldStop, False) - - # "addFailure(test, err)" - # ... - # "Called when the test case test signals a failure. err is a tuple of - # the form returned by sys.exc_info(): (type, value, traceback)" - # ... - # "wasSuccessful() - Returns True if all tests run so far have passed, - # otherwise returns False" - # ... - # "testsRun - The total number of tests run so far." - # ... - # "errors - A list containing 2-tuples of TestCase instances and - # formatted tracebacks. Each tuple represents a test which raised an - # unexpected exception. Contains formatted - # tracebacks instead of sys.exc_info() results." - # ... - # "failures - A list containing 2-tuples of TestCase instances and - # formatted tracebacks. Each tuple represents a test where a failure was - # explicitly signalled using the TestCase.fail*() or TestCase.assert*() - # methods. Contains formatted tracebacks instead - # of sys.exc_info() results." - def test_addFailure(self): - class Foo(unittest.TestCase): - def test_1(self): - pass - - test = Foo('test_1') - try: - test.fail("foo") - except: - exc_info_tuple = sys.exc_info() - - result = unittest.TestResult() - - result.startTest(test) - result.addFailure(test, exc_info_tuple) - result.stopTest(test) - - self.assertFalse(result.wasSuccessful()) - self.assertEqual(len(result.errors), 0) - self.assertEqual(len(result.failures), 1) - self.assertEqual(result.testsRun, 1) - self.assertEqual(result.shouldStop, False) - - test_case, formatted_exc = result.failures[0] - self.assertIs(test_case, test) - self.assertIsInstance(formatted_exc, str) - - # "addError(test, err)" - # ... - # "Called when the test case test raises an unexpected exception err - # is a tuple of the form returned by sys.exc_info(): - # (type, value, traceback)" - # ... - # "wasSuccessful() - Returns True if all tests run so far have passed, - # otherwise returns False" - # ... - # "testsRun - The total number of tests run so far." - # ... - # "errors - A list containing 2-tuples of TestCase instances and - # formatted tracebacks. Each tuple represents a test which raised an - # unexpected exception. Contains formatted - # tracebacks instead of sys.exc_info() results." - # ... - # "failures - A list containing 2-tuples of TestCase instances and - # formatted tracebacks. Each tuple represents a test where a failure was - # explicitly signalled using the TestCase.fail*() or TestCase.assert*() - # methods. Contains formatted tracebacks instead - # of sys.exc_info() results." - def test_addError(self): - class Foo(unittest.TestCase): - def test_1(self): - pass - - test = Foo('test_1') - try: - raise TypeError() - except: - exc_info_tuple = sys.exc_info() - - result = unittest.TestResult() - - result.startTest(test) - result.addError(test, exc_info_tuple) - result.stopTest(test) - - self.assertFalse(result.wasSuccessful()) - self.assertEqual(len(result.errors), 1) - self.assertEqual(len(result.failures), 0) - self.assertEqual(result.testsRun, 1) - self.assertEqual(result.shouldStop, False) - - test_case, formatted_exc = result.errors[0] - self.assertIs(test_case, test) - self.assertIsInstance(formatted_exc, str) - - def testGetDescriptionWithoutDocstring(self): - result = unittest.TextTestResult(None, True, 1) - self.assertEqual( - result.getDescription(self), - 'testGetDescriptionWithoutDocstring (' + __name__ + - '.Test_TestResult)') - - @unittest.skipIf(sys.flags.optimize >= 2, - "Docstrings are omitted with -O2 and above") - def testGetDescriptionWithOneLineDocstring(self): - """Tests getDescription() for a method with a docstring.""" - result = unittest.TextTestResult(None, True, 1) - self.assertEqual( - result.getDescription(self), - ('testGetDescriptionWithOneLineDocstring ' - '(' + __name__ + '.Test_TestResult)\n' - 'Tests getDescription() for a method with a docstring.')) - - @unittest.skipIf(sys.flags.optimize >= 2, - "Docstrings are omitted with -O2 and above") - def testGetDescriptionWithMultiLineDocstring(self): - """Tests getDescription() for a method with a longer docstring. - The second line of the docstring. - """ - result = unittest.TextTestResult(None, True, 1) - self.assertEqual( - result.getDescription(self), - ('testGetDescriptionWithMultiLineDocstring ' - '(' + __name__ + '.Test_TestResult)\n' - 'Tests getDescription() for a method with a longer ' - 'docstring.')) - - def testStackFrameTrimming(self): - class Frame(object): - class tb_frame(object): - f_globals = {} - result = unittest.TestResult() - self.assertFalse(result._is_relevant_tb_level(Frame)) - - Frame.tb_frame.f_globals['__unittest'] = True - self.assertTrue(result._is_relevant_tb_level(Frame)) - - def testFailFast(self): - result = unittest.TestResult() - result._exc_info_to_string = lambda *_: '' - result.failfast = True - result.addError(None, None) - self.assertTrue(result.shouldStop) - - result = unittest.TestResult() - result._exc_info_to_string = lambda *_: '' - result.failfast = True - result.addFailure(None, None) - self.assertTrue(result.shouldStop) - - result = unittest.TestResult() - result._exc_info_to_string = lambda *_: '' - result.failfast = True - result.addUnexpectedSuccess(None) - self.assertTrue(result.shouldStop) - - def testFailFastSetByRunner(self): - runner = unittest.TextTestRunner(stream=StringIO(), failfast=True) - def test(result): - self.assertTrue(result.failfast) - runner.run(test) - - -classDict = dict(unittest.TestResult.__dict__) -for m in ('addSkip', 'addExpectedFailure', 'addUnexpectedSuccess', - '__init__'): - del classDict[m] - -def __init__(self, stream=None, descriptions=None, verbosity=None): - self.failures = [] - self.errors = [] - self.testsRun = 0 - self.shouldStop = False - self.buffer = False - -classDict['__init__'] = __init__ -OldResult = type('OldResult', (object,), classDict) - -class Test_OldTestResult(unittest.TestCase): - - def assertOldResultWarning(self, test, failures): - with test_support.check_warnings(("TestResult has no add.+ method,", - RuntimeWarning)): - result = OldResult() - test.run(result) - self.assertEqual(len(result.failures), failures) - - def testOldTestResult(self): - class Test(unittest.TestCase): - def testSkip(self): - self.skipTest('foobar') - @unittest.expectedFailure - def testExpectedFail(self): - raise TypeError - @unittest.expectedFailure - def testUnexpectedSuccess(self): - pass - - for test_name, should_pass in (('testSkip', True), - ('testExpectedFail', True), - ('testUnexpectedSuccess', False)): - test = Test(test_name) - self.assertOldResultWarning(test, int(not should_pass)) - - def testOldTestTesultSetup(self): - class Test(unittest.TestCase): - def setUp(self): - self.skipTest('no reason') - def testFoo(self): - pass - self.assertOldResultWarning(Test('testFoo'), 0) - - def testOldTestResultClass(self): - @unittest.skip('no reason') - class Test(unittest.TestCase): - def testFoo(self): - pass - self.assertOldResultWarning(Test('testFoo'), 0) - - def testOldResultWithRunner(self): - class Test(unittest.TestCase): - def testFoo(self): - pass - runner = unittest.TextTestRunner(resultclass=OldResult, - stream=StringIO()) - # This will raise an exception if TextTestRunner can't handle old - # test result objects - runner.run(Test('testFoo')) - - -class MockTraceback(object): - @staticmethod - def format_exception(*_): - return ['A traceback'] - -def restore_traceback(): - unittest.result.traceback = traceback - - -class TestOutputBuffering(unittest.TestCase): - - def setUp(self): - self._real_out = sys.stdout - self._real_err = sys.stderr - - def tearDown(self): - sys.stdout = self._real_out - sys.stderr = self._real_err - - def testBufferOutputOff(self): - real_out = self._real_out - real_err = self._real_err - - result = unittest.TestResult() - self.assertFalse(result.buffer) - - self.assertIs(real_out, sys.stdout) - self.assertIs(real_err, sys.stderr) - - result.startTest(self) - - self.assertIs(real_out, sys.stdout) - self.assertIs(real_err, sys.stderr) - - def testBufferOutputStartTestAddSuccess(self): - real_out = self._real_out - real_err = self._real_err - - result = unittest.TestResult() - self.assertFalse(result.buffer) - - result.buffer = True - - self.assertIs(real_out, sys.stdout) - self.assertIs(real_err, sys.stderr) - - result.startTest(self) - - self.assertIsNot(real_out, sys.stdout) - self.assertIsNot(real_err, sys.stderr) - self.assertIsInstance(sys.stdout, StringIO) - self.assertIsInstance(sys.stderr, StringIO) - self.assertIsNot(sys.stdout, sys.stderr) - - out_stream = sys.stdout - err_stream = sys.stderr - - result._original_stdout = StringIO() - result._original_stderr = StringIO() - - print 'foo' - print >> sys.stderr, 'bar' - - self.assertEqual(out_stream.getvalue(), 'foo\n') - self.assertEqual(err_stream.getvalue(), 'bar\n') - - self.assertEqual(result._original_stdout.getvalue(), '') - self.assertEqual(result._original_stderr.getvalue(), '') - - result.addSuccess(self) - result.stopTest(self) - - self.assertIs(sys.stdout, result._original_stdout) - self.assertIs(sys.stderr, result._original_stderr) - - self.assertEqual(result._original_stdout.getvalue(), '') - self.assertEqual(result._original_stderr.getvalue(), '') - - self.assertEqual(out_stream.getvalue(), '') - self.assertEqual(err_stream.getvalue(), '') - - - def getStartedResult(self): - result = unittest.TestResult() - result.buffer = True - result.startTest(self) - return result - - def testBufferOutputAddErrorOrFailure(self): - unittest.result.traceback = MockTraceback - self.addCleanup(restore_traceback) - - for message_attr, add_attr, include_error in [ - ('errors', 'addError', True), - ('failures', 'addFailure', False), - ('errors', 'addError', True), - ('failures', 'addFailure', False) - ]: - result = self.getStartedResult() - buffered_out = sys.stdout - buffered_err = sys.stderr - result._original_stdout = StringIO() - result._original_stderr = StringIO() - - print >> sys.stdout, 'foo' - if include_error: - print >> sys.stderr, 'bar' - - - addFunction = getattr(result, add_attr) - addFunction(self, (None, None, None)) - result.stopTest(self) - - result_list = getattr(result, message_attr) - self.assertEqual(len(result_list), 1) - - test, message = result_list[0] - expectedOutMessage = textwrap.dedent(""" - Stdout: - foo - """) - expectedErrMessage = '' - if include_error: - expectedErrMessage = textwrap.dedent(""" - Stderr: - bar - """) - expectedFullMessage = 'A traceback%s%s' % (expectedOutMessage, expectedErrMessage) - - self.assertIs(test, self) - self.assertEqual(result._original_stdout.getvalue(), expectedOutMessage) - self.assertEqual(result._original_stderr.getvalue(), expectedErrMessage) - self.assertMultiLineEqual(message, expectedFullMessage) - - def testBufferSetupClass(self): - result = unittest.TestResult() - result.buffer = True - - class Foo(unittest.TestCase): - @classmethod - def setUpClass(cls): - 1//0 - def test_foo(self): - pass - suite = unittest.TestSuite([Foo('test_foo')]) - suite(result) - self.assertEqual(len(result.errors), 1) - - def testBufferTearDownClass(self): - result = unittest.TestResult() - result.buffer = True - - class Foo(unittest.TestCase): - @classmethod - def tearDownClass(cls): - 1//0 - def test_foo(self): - pass - suite = unittest.TestSuite([Foo('test_foo')]) - suite(result) - self.assertEqual(len(result.errors), 1) - - def testBufferSetUpModule(self): - result = unittest.TestResult() - result.buffer = True - - class Foo(unittest.TestCase): - def test_foo(self): - pass - class Module(object): - @staticmethod - def setUpModule(): - 1//0 - - Foo.__module__ = 'Module' - sys.modules['Module'] = Module - self.addCleanup(sys.modules.pop, 'Module') - suite = unittest.TestSuite([Foo('test_foo')]) - suite(result) - self.assertEqual(len(result.errors), 1) - - def testBufferTearDownModule(self): - result = unittest.TestResult() - result.buffer = True - - class Foo(unittest.TestCase): - def test_foo(self): - pass - class Module(object): - @staticmethod - def tearDownModule(): - 1//0 - - Foo.__module__ = 'Module' - sys.modules['Module'] = Module - self.addCleanup(sys.modules.pop, 'Module') - suite = unittest.TestSuite([Foo('test_foo')]) - suite(result) - self.assertEqual(len(result.errors), 1) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/test/test_runner.py b/python/Lib/unittest/test/test_runner.py deleted file mode 100644 index d6d5261429..0000000000 --- a/python/Lib/unittest/test/test_runner.py +++ /dev/null @@ -1,267 +0,0 @@ -import unittest - -from cStringIO import StringIO -import pickle - -from unittest.test.support import (LoggingResult, - ResultWithNoStartTestRunStopTestRun) - - -class TestCleanUp(unittest.TestCase): - - def testCleanUp(self): - class TestableTest(unittest.TestCase): - def testNothing(self): - pass - - test = TestableTest('testNothing') - self.assertEqual(test._cleanups, []) - - cleanups = [] - - def cleanup1(*args, **kwargs): - cleanups.append((1, args, kwargs)) - - def cleanup2(*args, **kwargs): - cleanups.append((2, args, kwargs)) - - test.addCleanup(cleanup1, 1, 2, 3, four='hello', five='goodbye') - test.addCleanup(cleanup2) - - self.assertEqual(test._cleanups, - [(cleanup1, (1, 2, 3), dict(four='hello', five='goodbye')), - (cleanup2, (), {})]) - - result = test.doCleanups() - self.assertTrue(result) - - self.assertEqual(cleanups, [(2, (), {}), (1, (1, 2, 3), - dict(four='hello', five='goodbye'))]) - - def testCleanUpWithErrors(self): - class TestableTest(unittest.TestCase): - def testNothing(self): - pass - - class MockResult(object): - errors = [] - def addError(self, test, exc_info): - self.errors.append((test, exc_info)) - - result = MockResult() - test = TestableTest('testNothing') - test._resultForDoCleanups = result - - exc1 = Exception('foo') - exc2 = Exception('bar') - def cleanup1(): - raise exc1 - - def cleanup2(): - raise exc2 - - test.addCleanup(cleanup1) - test.addCleanup(cleanup2) - - self.assertFalse(test.doCleanups()) - - (test1, (Type1, instance1, _)), (test2, (Type2, instance2, _)) = reversed(MockResult.errors) - self.assertEqual((test1, Type1, instance1), (test, Exception, exc1)) - self.assertEqual((test2, Type2, instance2), (test, Exception, exc2)) - - def testCleanupInRun(self): - blowUp = False - ordering = [] - - class TestableTest(unittest.TestCase): - def setUp(self): - ordering.append('setUp') - if blowUp: - raise Exception('foo') - - def testNothing(self): - ordering.append('test') - - def tearDown(self): - ordering.append('tearDown') - - test = TestableTest('testNothing') - - def cleanup1(): - ordering.append('cleanup1') - def cleanup2(): - ordering.append('cleanup2') - test.addCleanup(cleanup1) - test.addCleanup(cleanup2) - - def success(some_test): - self.assertEqual(some_test, test) - ordering.append('success') - - result = unittest.TestResult() - result.addSuccess = success - - test.run(result) - self.assertEqual(ordering, ['setUp', 'test', 'tearDown', - 'cleanup2', 'cleanup1', 'success']) - - blowUp = True - ordering = [] - test = TestableTest('testNothing') - test.addCleanup(cleanup1) - test.run(result) - self.assertEqual(ordering, ['setUp', 'cleanup1']) - - def testTestCaseDebugExecutesCleanups(self): - ordering = [] - - class TestableTest(unittest.TestCase): - def setUp(self): - ordering.append('setUp') - self.addCleanup(cleanup1) - - def testNothing(self): - ordering.append('test') - - def tearDown(self): - ordering.append('tearDown') - - test = TestableTest('testNothing') - - def cleanup1(): - ordering.append('cleanup1') - test.addCleanup(cleanup2) - def cleanup2(): - ordering.append('cleanup2') - - test.debug() - self.assertEqual(ordering, ['setUp', 'test', 'tearDown', 'cleanup1', 'cleanup2']) - - -class Test_TextTestRunner(unittest.TestCase): - """Tests for TextTestRunner.""" - - def test_init(self): - runner = unittest.TextTestRunner() - self.assertFalse(runner.failfast) - self.assertFalse(runner.buffer) - self.assertEqual(runner.verbosity, 1) - self.assertTrue(runner.descriptions) - self.assertEqual(runner.resultclass, unittest.TextTestResult) - - - def test_multiple_inheritance(self): - class AResult(unittest.TestResult): - def __init__(self, stream, descriptions, verbosity): - super(AResult, self).__init__(stream, descriptions, verbosity) - - class ATextResult(unittest.TextTestResult, AResult): - pass - - # This used to raise an exception due to TextTestResult not passing - # on arguments in its __init__ super call - ATextResult(None, None, 1) - - - def testBufferAndFailfast(self): - class Test(unittest.TestCase): - def testFoo(self): - pass - result = unittest.TestResult() - runner = unittest.TextTestRunner(stream=StringIO(), failfast=True, - buffer=True) - # Use our result object - runner._makeResult = lambda: result - runner.run(Test('testFoo')) - - self.assertTrue(result.failfast) - self.assertTrue(result.buffer) - - def testRunnerRegistersResult(self): - class Test(unittest.TestCase): - def testFoo(self): - pass - originalRegisterResult = unittest.runner.registerResult - def cleanup(): - unittest.runner.registerResult = originalRegisterResult - self.addCleanup(cleanup) - - result = unittest.TestResult() - runner = unittest.TextTestRunner(stream=StringIO()) - # Use our result object - runner._makeResult = lambda: result - - self.wasRegistered = 0 - def fakeRegisterResult(thisResult): - self.wasRegistered += 1 - self.assertEqual(thisResult, result) - unittest.runner.registerResult = fakeRegisterResult - - runner.run(unittest.TestSuite()) - self.assertEqual(self.wasRegistered, 1) - - def test_works_with_result_without_startTestRun_stopTestRun(self): - class OldTextResult(ResultWithNoStartTestRunStopTestRun): - separator2 = '' - def printErrors(self): - pass - - class Runner(unittest.TextTestRunner): - def __init__(self): - super(Runner, self).__init__(StringIO()) - - def _makeResult(self): - return OldTextResult() - - runner = Runner() - runner.run(unittest.TestSuite()) - - def test_startTestRun_stopTestRun_called(self): - class LoggingTextResult(LoggingResult): - separator2 = '' - def printErrors(self): - pass - - class LoggingRunner(unittest.TextTestRunner): - def __init__(self, events): - super(LoggingRunner, self).__init__(StringIO()) - self._events = events - - def _makeResult(self): - return LoggingTextResult(self._events) - - events = [] - runner = LoggingRunner(events) - runner.run(unittest.TestSuite()) - expected = ['startTestRun', 'stopTestRun'] - self.assertEqual(events, expected) - - def test_pickle_unpickle(self): - # Issue #7197: a TextTestRunner should be (un)pickleable. This is - # required by test_multiprocessing under Windows (in verbose mode). - from StringIO import StringIO as PickleableIO - # cStringIO objects are not pickleable, but StringIO objects are. - stream = PickleableIO("foo") - runner = unittest.TextTestRunner(stream) - for protocol in range(pickle.HIGHEST_PROTOCOL + 1): - s = pickle.dumps(runner, protocol=protocol) - obj = pickle.loads(s) - # StringIO objects never compare equal, a cheap test instead. - self.assertEqual(obj.stream.getvalue(), stream.getvalue()) - - def test_resultclass(self): - def MockResultClass(*args): - return args - STREAM = object() - DESCRIPTIONS = object() - VERBOSITY = object() - runner = unittest.TextTestRunner(STREAM, DESCRIPTIONS, VERBOSITY, - resultclass=MockResultClass) - self.assertEqual(runner.resultclass, MockResultClass) - - expectedresult = (runner.stream, DESCRIPTIONS, VERBOSITY) - self.assertEqual(runner._makeResult(), expectedresult) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/test/test_setups.py b/python/Lib/unittest/test/test_setups.py deleted file mode 100644 index 9456819ea1..0000000000 --- a/python/Lib/unittest/test/test_setups.py +++ /dev/null @@ -1,508 +0,0 @@ -import sys - -from cStringIO import StringIO - -import unittest - - -def resultFactory(*_): - return unittest.TestResult() - - -class TestSetups(unittest.TestCase): - - def getRunner(self): - return unittest.TextTestRunner(resultclass=resultFactory, - stream=StringIO()) - def runTests(self, *cases): - suite = unittest.TestSuite() - for case in cases: - tests = unittest.defaultTestLoader.loadTestsFromTestCase(case) - suite.addTests(tests) - - runner = self.getRunner() - - # creating a nested suite exposes some potential bugs - realSuite = unittest.TestSuite() - realSuite.addTest(suite) - # adding empty suites to the end exposes potential bugs - suite.addTest(unittest.TestSuite()) - realSuite.addTest(unittest.TestSuite()) - return runner.run(realSuite) - - def test_setup_class(self): - class Test(unittest.TestCase): - setUpCalled = 0 - @classmethod - def setUpClass(cls): - Test.setUpCalled += 1 - unittest.TestCase.setUpClass() - def test_one(self): - pass - def test_two(self): - pass - - result = self.runTests(Test) - - self.assertEqual(Test.setUpCalled, 1) - self.assertEqual(result.testsRun, 2) - self.assertEqual(len(result.errors), 0) - - def test_teardown_class(self): - class Test(unittest.TestCase): - tearDownCalled = 0 - @classmethod - def tearDownClass(cls): - Test.tearDownCalled += 1 - unittest.TestCase.tearDownClass() - def test_one(self): - pass - def test_two(self): - pass - - result = self.runTests(Test) - - self.assertEqual(Test.tearDownCalled, 1) - self.assertEqual(result.testsRun, 2) - self.assertEqual(len(result.errors), 0) - - def test_teardown_class_two_classes(self): - class Test(unittest.TestCase): - tearDownCalled = 0 - @classmethod - def tearDownClass(cls): - Test.tearDownCalled += 1 - unittest.TestCase.tearDownClass() - def test_one(self): - pass - def test_two(self): - pass - - class Test2(unittest.TestCase): - tearDownCalled = 0 - @classmethod - def tearDownClass(cls): - Test2.tearDownCalled += 1 - unittest.TestCase.tearDownClass() - def test_one(self): - pass - def test_two(self): - pass - - result = self.runTests(Test, Test2) - - self.assertEqual(Test.tearDownCalled, 1) - self.assertEqual(Test2.tearDownCalled, 1) - self.assertEqual(result.testsRun, 4) - self.assertEqual(len(result.errors), 0) - - def test_error_in_setupclass(self): - class BrokenTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - raise TypeError('foo') - def test_one(self): - pass - def test_two(self): - pass - - result = self.runTests(BrokenTest) - - self.assertEqual(result.testsRun, 0) - self.assertEqual(len(result.errors), 1) - error, _ = result.errors[0] - self.assertEqual(str(error), - 'setUpClass (%s.BrokenTest)' % __name__) - - def test_error_in_teardown_class(self): - class Test(unittest.TestCase): - tornDown = 0 - @classmethod - def tearDownClass(cls): - Test.tornDown += 1 - raise TypeError('foo') - def test_one(self): - pass - def test_two(self): - pass - - class Test2(unittest.TestCase): - tornDown = 0 - @classmethod - def tearDownClass(cls): - Test2.tornDown += 1 - raise TypeError('foo') - def test_one(self): - pass - def test_two(self): - pass - - result = self.runTests(Test, Test2) - self.assertEqual(result.testsRun, 4) - self.assertEqual(len(result.errors), 2) - self.assertEqual(Test.tornDown, 1) - self.assertEqual(Test2.tornDown, 1) - - error, _ = result.errors[0] - self.assertEqual(str(error), - 'tearDownClass (%s.Test)' % __name__) - - def test_class_not_torndown_when_setup_fails(self): - class Test(unittest.TestCase): - tornDown = False - @classmethod - def setUpClass(cls): - raise TypeError - @classmethod - def tearDownClass(cls): - Test.tornDown = True - raise TypeError('foo') - def test_one(self): - pass - - self.runTests(Test) - self.assertFalse(Test.tornDown) - - def test_class_not_setup_or_torndown_when_skipped(self): - class Test(unittest.TestCase): - classSetUp = False - tornDown = False - @classmethod - def setUpClass(cls): - Test.classSetUp = True - @classmethod - def tearDownClass(cls): - Test.tornDown = True - def test_one(self): - pass - - Test = unittest.skip("hop")(Test) - self.runTests(Test) - self.assertFalse(Test.classSetUp) - self.assertFalse(Test.tornDown) - - def test_setup_teardown_order_with_pathological_suite(self): - results = [] - - class Module1(object): - @staticmethod - def setUpModule(): - results.append('Module1.setUpModule') - @staticmethod - def tearDownModule(): - results.append('Module1.tearDownModule') - - class Module2(object): - @staticmethod - def setUpModule(): - results.append('Module2.setUpModule') - @staticmethod - def tearDownModule(): - results.append('Module2.tearDownModule') - - class Test1(unittest.TestCase): - @classmethod - def setUpClass(cls): - results.append('setup 1') - @classmethod - def tearDownClass(cls): - results.append('teardown 1') - def testOne(self): - results.append('Test1.testOne') - def testTwo(self): - results.append('Test1.testTwo') - - class Test2(unittest.TestCase): - @classmethod - def setUpClass(cls): - results.append('setup 2') - @classmethod - def tearDownClass(cls): - results.append('teardown 2') - def testOne(self): - results.append('Test2.testOne') - def testTwo(self): - results.append('Test2.testTwo') - - class Test3(unittest.TestCase): - @classmethod - def setUpClass(cls): - results.append('setup 3') - @classmethod - def tearDownClass(cls): - results.append('teardown 3') - def testOne(self): - results.append('Test3.testOne') - def testTwo(self): - results.append('Test3.testTwo') - - Test1.__module__ = Test2.__module__ = 'Module' - Test3.__module__ = 'Module2' - sys.modules['Module'] = Module1 - sys.modules['Module2'] = Module2 - - first = unittest.TestSuite((Test1('testOne'),)) - second = unittest.TestSuite((Test1('testTwo'),)) - third = unittest.TestSuite((Test2('testOne'),)) - fourth = unittest.TestSuite((Test2('testTwo'),)) - fifth = unittest.TestSuite((Test3('testOne'),)) - sixth = unittest.TestSuite((Test3('testTwo'),)) - suite = unittest.TestSuite((first, second, third, fourth, fifth, sixth)) - - runner = self.getRunner() - result = runner.run(suite) - self.assertEqual(result.testsRun, 6) - self.assertEqual(len(result.errors), 0) - - self.assertEqual(results, - ['Module1.setUpModule', 'setup 1', - 'Test1.testOne', 'Test1.testTwo', 'teardown 1', - 'setup 2', 'Test2.testOne', 'Test2.testTwo', - 'teardown 2', 'Module1.tearDownModule', - 'Module2.setUpModule', 'setup 3', - 'Test3.testOne', 'Test3.testTwo', - 'teardown 3', 'Module2.tearDownModule']) - - def test_setup_module(self): - class Module(object): - moduleSetup = 0 - @staticmethod - def setUpModule(): - Module.moduleSetup += 1 - - class Test(unittest.TestCase): - def test_one(self): - pass - def test_two(self): - pass - Test.__module__ = 'Module' - sys.modules['Module'] = Module - - result = self.runTests(Test) - self.assertEqual(Module.moduleSetup, 1) - self.assertEqual(result.testsRun, 2) - self.assertEqual(len(result.errors), 0) - - def test_error_in_setup_module(self): - class Module(object): - moduleSetup = 0 - moduleTornDown = 0 - @staticmethod - def setUpModule(): - Module.moduleSetup += 1 - raise TypeError('foo') - @staticmethod - def tearDownModule(): - Module.moduleTornDown += 1 - - class Test(unittest.TestCase): - classSetUp = False - classTornDown = False - @classmethod - def setUpClass(cls): - Test.classSetUp = True - @classmethod - def tearDownClass(cls): - Test.classTornDown = True - def test_one(self): - pass - def test_two(self): - pass - - class Test2(unittest.TestCase): - def test_one(self): - pass - def test_two(self): - pass - Test.__module__ = 'Module' - Test2.__module__ = 'Module' - sys.modules['Module'] = Module - - result = self.runTests(Test, Test2) - self.assertEqual(Module.moduleSetup, 1) - self.assertEqual(Module.moduleTornDown, 0) - self.assertEqual(result.testsRun, 0) - self.assertFalse(Test.classSetUp) - self.assertFalse(Test.classTornDown) - self.assertEqual(len(result.errors), 1) - error, _ = result.errors[0] - self.assertEqual(str(error), 'setUpModule (Module)') - - def test_testcase_with_missing_module(self): - class Test(unittest.TestCase): - def test_one(self): - pass - def test_two(self): - pass - Test.__module__ = 'Module' - sys.modules.pop('Module', None) - - result = self.runTests(Test) - self.assertEqual(result.testsRun, 2) - - def test_teardown_module(self): - class Module(object): - moduleTornDown = 0 - @staticmethod - def tearDownModule(): - Module.moduleTornDown += 1 - - class Test(unittest.TestCase): - def test_one(self): - pass - def test_two(self): - pass - Test.__module__ = 'Module' - sys.modules['Module'] = Module - - result = self.runTests(Test) - self.assertEqual(Module.moduleTornDown, 1) - self.assertEqual(result.testsRun, 2) - self.assertEqual(len(result.errors), 0) - - def test_error_in_teardown_module(self): - class Module(object): - moduleTornDown = 0 - @staticmethod - def tearDownModule(): - Module.moduleTornDown += 1 - raise TypeError('foo') - - class Test(unittest.TestCase): - classSetUp = False - classTornDown = False - @classmethod - def setUpClass(cls): - Test.classSetUp = True - @classmethod - def tearDownClass(cls): - Test.classTornDown = True - def test_one(self): - pass - def test_two(self): - pass - - class Test2(unittest.TestCase): - def test_one(self): - pass - def test_two(self): - pass - Test.__module__ = 'Module' - Test2.__module__ = 'Module' - sys.modules['Module'] = Module - - result = self.runTests(Test, Test2) - self.assertEqual(Module.moduleTornDown, 1) - self.assertEqual(result.testsRun, 4) - self.assertTrue(Test.classSetUp) - self.assertTrue(Test.classTornDown) - self.assertEqual(len(result.errors), 1) - error, _ = result.errors[0] - self.assertEqual(str(error), 'tearDownModule (Module)') - - def test_skiptest_in_setupclass(self): - class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - raise unittest.SkipTest('foo') - def test_one(self): - pass - def test_two(self): - pass - - result = self.runTests(Test) - self.assertEqual(result.testsRun, 0) - self.assertEqual(len(result.errors), 0) - self.assertEqual(len(result.skipped), 1) - skipped = result.skipped[0][0] - self.assertEqual(str(skipped), 'setUpClass (%s.Test)' % __name__) - - def test_skiptest_in_setupmodule(self): - class Test(unittest.TestCase): - def test_one(self): - pass - def test_two(self): - pass - - class Module(object): - @staticmethod - def setUpModule(): - raise unittest.SkipTest('foo') - - Test.__module__ = 'Module' - sys.modules['Module'] = Module - - result = self.runTests(Test) - self.assertEqual(result.testsRun, 0) - self.assertEqual(len(result.errors), 0) - self.assertEqual(len(result.skipped), 1) - skipped = result.skipped[0][0] - self.assertEqual(str(skipped), 'setUpModule (Module)') - - def test_suite_debug_executes_setups_and_teardowns(self): - ordering = [] - - class Module(object): - @staticmethod - def setUpModule(): - ordering.append('setUpModule') - @staticmethod - def tearDownModule(): - ordering.append('tearDownModule') - - class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - ordering.append('setUpClass') - @classmethod - def tearDownClass(cls): - ordering.append('tearDownClass') - def test_something(self): - ordering.append('test_something') - - Test.__module__ = 'Module' - sys.modules['Module'] = Module - - suite = unittest.defaultTestLoader.loadTestsFromTestCase(Test) - suite.debug() - expectedOrder = ['setUpModule', 'setUpClass', 'test_something', 'tearDownClass', 'tearDownModule'] - self.assertEqual(ordering, expectedOrder) - - def test_suite_debug_propagates_exceptions(self): - class Module(object): - @staticmethod - def setUpModule(): - if phase == 0: - raise Exception('setUpModule') - @staticmethod - def tearDownModule(): - if phase == 1: - raise Exception('tearDownModule') - - class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - if phase == 2: - raise Exception('setUpClass') - @classmethod - def tearDownClass(cls): - if phase == 3: - raise Exception('tearDownClass') - def test_something(self): - if phase == 4: - raise Exception('test_something') - - Test.__module__ = 'Module' - sys.modules['Module'] = Module - - _suite = unittest.defaultTestLoader.loadTestsFromTestCase(Test) - suite = unittest.TestSuite() - suite.addTest(_suite) - - messages = ('setUpModule', 'tearDownModule', 'setUpClass', 'tearDownClass', 'test_something') - for phase, msg in enumerate(messages): - with self.assertRaisesRegexp(Exception, msg): - suite.debug() - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/test/test_skipping.py b/python/Lib/unittest/test/test_skipping.py deleted file mode 100644 index 6312231491..0000000000 --- a/python/Lib/unittest/test/test_skipping.py +++ /dev/null @@ -1,168 +0,0 @@ -import unittest - -from unittest.test.support import LoggingResult - - -class Test_TestSkipping(unittest.TestCase): - - def test_skipping(self): - class Foo(unittest.TestCase): - def test_skip_me(self): - self.skipTest("skip") - events = [] - result = LoggingResult(events) - test = Foo("test_skip_me") - test.run(result) - self.assertEqual(events, ['startTest', 'addSkip', 'stopTest']) - self.assertEqual(result.skipped, [(test, "skip")]) - - # Try letting setUp skip the test now. - class Foo(unittest.TestCase): - def setUp(self): - self.skipTest("testing") - def test_nothing(self): pass - events = [] - result = LoggingResult(events) - test = Foo("test_nothing") - test.run(result) - self.assertEqual(events, ['startTest', 'addSkip', 'stopTest']) - self.assertEqual(result.skipped, [(test, "testing")]) - self.assertEqual(result.testsRun, 1) - - def test_skipping_decorators(self): - op_table = ((unittest.skipUnless, False, True), - (unittest.skipIf, True, False)) - for deco, do_skip, dont_skip in op_table: - class Foo(unittest.TestCase): - @deco(do_skip, "testing") - def test_skip(self): pass - - @deco(dont_skip, "testing") - def test_dont_skip(self): pass - test_do_skip = Foo("test_skip") - test_dont_skip = Foo("test_dont_skip") - suite = unittest.TestSuite([test_do_skip, test_dont_skip]) - events = [] - result = LoggingResult(events) - suite.run(result) - self.assertEqual(len(result.skipped), 1) - expected = ['startTest', 'addSkip', 'stopTest', - 'startTest', 'addSuccess', 'stopTest'] - self.assertEqual(events, expected) - self.assertEqual(result.testsRun, 2) - self.assertEqual(result.skipped, [(test_do_skip, "testing")]) - self.assertTrue(result.wasSuccessful()) - - def test_skip_class(self): - @unittest.skip("testing") - class Foo(unittest.TestCase): - def test_1(self): - record.append(1) - record = [] - result = unittest.TestResult() - test = Foo("test_1") - suite = unittest.TestSuite([test]) - suite.run(result) - self.assertEqual(result.skipped, [(test, "testing")]) - self.assertEqual(record, []) - - def test_skip_non_unittest_class_old_style(self): - @unittest.skip("testing") - class Mixin: - def test_1(self): - record.append(1) - class Foo(Mixin, unittest.TestCase): - pass - record = [] - result = unittest.TestResult() - test = Foo("test_1") - suite = unittest.TestSuite([test]) - suite.run(result) - self.assertEqual(result.skipped, [(test, "testing")]) - self.assertEqual(record, []) - - def test_skip_non_unittest_class_new_style(self): - @unittest.skip("testing") - class Mixin(object): - def test_1(self): - record.append(1) - class Foo(Mixin, unittest.TestCase): - pass - record = [] - result = unittest.TestResult() - test = Foo("test_1") - suite = unittest.TestSuite([test]) - suite.run(result) - self.assertEqual(result.skipped, [(test, "testing")]) - self.assertEqual(record, []) - - def test_expected_failure(self): - class Foo(unittest.TestCase): - @unittest.expectedFailure - def test_die(self): - self.fail("help me!") - events = [] - result = LoggingResult(events) - test = Foo("test_die") - test.run(result) - self.assertEqual(events, - ['startTest', 'addExpectedFailure', 'stopTest']) - self.assertEqual(result.expectedFailures[0][0], test) - self.assertTrue(result.wasSuccessful()) - - def test_unexpected_success(self): - class Foo(unittest.TestCase): - @unittest.expectedFailure - def test_die(self): - pass - events = [] - result = LoggingResult(events) - test = Foo("test_die") - test.run(result) - self.assertEqual(events, - ['startTest', 'addUnexpectedSuccess', 'stopTest']) - self.assertFalse(result.failures) - self.assertEqual(result.unexpectedSuccesses, [test]) - self.assertTrue(result.wasSuccessful()) - - def test_skip_doesnt_run_setup(self): - class Foo(unittest.TestCase): - wasSetUp = False - wasTornDown = False - def setUp(self): - Foo.wasSetUp = True - def tornDown(self): - Foo.wasTornDown = True - @unittest.skip('testing') - def test_1(self): - pass - - result = unittest.TestResult() - test = Foo("test_1") - suite = unittest.TestSuite([test]) - suite.run(result) - self.assertEqual(result.skipped, [(test, "testing")]) - self.assertFalse(Foo.wasSetUp) - self.assertFalse(Foo.wasTornDown) - - def test_decorated_skip(self): - def decorator(func): - def inner(*a): - return func(*a) - return inner - - class Foo(unittest.TestCase): - @decorator - @unittest.skip('testing') - def test_1(self): - pass - - result = unittest.TestResult() - test = Foo("test_1") - suite = unittest.TestSuite([test]) - suite.run(result) - self.assertEqual(result.skipped, [(test, "testing")]) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/test/test_suite.py b/python/Lib/unittest/test/test_suite.py deleted file mode 100644 index 65b5b1452e..0000000000 --- a/python/Lib/unittest/test/test_suite.py +++ /dev/null @@ -1,367 +0,0 @@ -import unittest - -import sys -from unittest.test.support import LoggingResult, TestEquality - - -### Support code for Test_TestSuite -################################################################ - -class Test(object): - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - def test_3(self): pass - def runTest(self): pass - -def _mk_TestSuite(*names): - return unittest.TestSuite(Test.Foo(n) for n in names) - -################################################################ - - -class Test_TestSuite(unittest.TestCase, TestEquality): - - ### Set up attributes needed by inherited tests - ################################################################ - - # Used by TestEquality.test_eq - eq_pairs = [(unittest.TestSuite(), unittest.TestSuite()), - (unittest.TestSuite(), unittest.TestSuite([])), - (_mk_TestSuite('test_1'), _mk_TestSuite('test_1'))] - - # Used by TestEquality.test_ne - ne_pairs = [(unittest.TestSuite(), _mk_TestSuite('test_1')), - (unittest.TestSuite([]), _mk_TestSuite('test_1')), - (_mk_TestSuite('test_1', 'test_2'), _mk_TestSuite('test_1', 'test_3')), - (_mk_TestSuite('test_1'), _mk_TestSuite('test_2'))] - - ################################################################ - ### /Set up attributes needed by inherited tests - - ### Tests for TestSuite.__init__ - ################################################################ - - # "class TestSuite([tests])" - # - # The tests iterable should be optional - def test_init__tests_optional(self): - suite = unittest.TestSuite() - - self.assertEqual(suite.countTestCases(), 0) - - # "class TestSuite([tests])" - # ... - # "If tests is given, it must be an iterable of individual test cases - # or other test suites that will be used to build the suite initially" - # - # TestSuite should deal with empty tests iterables by allowing the - # creation of an empty suite - def test_init__empty_tests(self): - suite = unittest.TestSuite([]) - - self.assertEqual(suite.countTestCases(), 0) - - # "class TestSuite([tests])" - # ... - # "If tests is given, it must be an iterable of individual test cases - # or other test suites that will be used to build the suite initially" - # - # TestSuite should allow any iterable to provide tests - def test_init__tests_from_any_iterable(self): - def tests(): - yield unittest.FunctionTestCase(lambda: None) - yield unittest.FunctionTestCase(lambda: None) - - suite_1 = unittest.TestSuite(tests()) - self.assertEqual(suite_1.countTestCases(), 2) - - suite_2 = unittest.TestSuite(suite_1) - self.assertEqual(suite_2.countTestCases(), 2) - - suite_3 = unittest.TestSuite(set(suite_1)) - self.assertEqual(suite_3.countTestCases(), 2) - - # "class TestSuite([tests])" - # ... - # "If tests is given, it must be an iterable of individual test cases - # or other test suites that will be used to build the suite initially" - # - # Does TestSuite() also allow other TestSuite() instances to be present - # in the tests iterable? - def test_init__TestSuite_instances_in_tests(self): - def tests(): - ftc = unittest.FunctionTestCase(lambda: None) - yield unittest.TestSuite([ftc]) - yield unittest.FunctionTestCase(lambda: None) - - suite = unittest.TestSuite(tests()) - self.assertEqual(suite.countTestCases(), 2) - - ################################################################ - ### /Tests for TestSuite.__init__ - - # Container types should support the iter protocol - def test_iter(self): - test1 = unittest.FunctionTestCase(lambda: None) - test2 = unittest.FunctionTestCase(lambda: None) - suite = unittest.TestSuite((test1, test2)) - - self.assertEqual(list(suite), [test1, test2]) - - # "Return the number of tests represented by the this test object. - # ...this method is also implemented by the TestSuite class, which can - # return larger [greater than 1] values" - # - # Presumably an empty TestSuite returns 0? - def test_countTestCases_zero_simple(self): - suite = unittest.TestSuite() - - self.assertEqual(suite.countTestCases(), 0) - - # "Return the number of tests represented by the this test object. - # ...this method is also implemented by the TestSuite class, which can - # return larger [greater than 1] values" - # - # Presumably an empty TestSuite (even if it contains other empty - # TestSuite instances) returns 0? - def test_countTestCases_zero_nested(self): - class Test1(unittest.TestCase): - def test(self): - pass - - suite = unittest.TestSuite([unittest.TestSuite()]) - - self.assertEqual(suite.countTestCases(), 0) - - # "Return the number of tests represented by the this test object. - # ...this method is also implemented by the TestSuite class, which can - # return larger [greater than 1] values" - def test_countTestCases_simple(self): - test1 = unittest.FunctionTestCase(lambda: None) - test2 = unittest.FunctionTestCase(lambda: None) - suite = unittest.TestSuite((test1, test2)) - - self.assertEqual(suite.countTestCases(), 2) - - # "Return the number of tests represented by the this test object. - # ...this method is also implemented by the TestSuite class, which can - # return larger [greater than 1] values" - # - # Make sure this holds for nested TestSuite instances, too - def test_countTestCases_nested(self): - class Test1(unittest.TestCase): - def test1(self): pass - def test2(self): pass - - test2 = unittest.FunctionTestCase(lambda: None) - test3 = unittest.FunctionTestCase(lambda: None) - child = unittest.TestSuite((Test1('test2'), test2)) - parent = unittest.TestSuite((test3, child, Test1('test1'))) - - self.assertEqual(parent.countTestCases(), 4) - - # "Run the tests associated with this suite, collecting the result into - # the test result object passed as result." - # - # And if there are no tests? What then? - def test_run__empty_suite(self): - events = [] - result = LoggingResult(events) - - suite = unittest.TestSuite() - - suite.run(result) - - self.assertEqual(events, []) - - # "Note that unlike TestCase.run(), TestSuite.run() requires the - # "result object to be passed in." - def test_run__requires_result(self): - suite = unittest.TestSuite() - - try: - suite.run() - except TypeError: - pass - else: - self.fail("Failed to raise TypeError") - - # "Run the tests associated with this suite, collecting the result into - # the test result object passed as result." - def test_run(self): - events = [] - result = LoggingResult(events) - - class LoggingCase(unittest.TestCase): - def run(self, result): - events.append('run %s' % self._testMethodName) - - def test1(self): pass - def test2(self): pass - - tests = [LoggingCase('test1'), LoggingCase('test2')] - - unittest.TestSuite(tests).run(result) - - self.assertEqual(events, ['run test1', 'run test2']) - - # "Add a TestCase ... to the suite" - def test_addTest__TestCase(self): - class Foo(unittest.TestCase): - def test(self): pass - - test = Foo('test') - suite = unittest.TestSuite() - - suite.addTest(test) - - self.assertEqual(suite.countTestCases(), 1) - self.assertEqual(list(suite), [test]) - - # "Add a ... TestSuite to the suite" - def test_addTest__TestSuite(self): - class Foo(unittest.TestCase): - def test(self): pass - - suite_2 = unittest.TestSuite([Foo('test')]) - - suite = unittest.TestSuite() - suite.addTest(suite_2) - - self.assertEqual(suite.countTestCases(), 1) - self.assertEqual(list(suite), [suite_2]) - - # "Add all the tests from an iterable of TestCase and TestSuite - # instances to this test suite." - # - # "This is equivalent to iterating over tests, calling addTest() for - # each element" - def test_addTests(self): - class Foo(unittest.TestCase): - def test_1(self): pass - def test_2(self): pass - - test_1 = Foo('test_1') - test_2 = Foo('test_2') - inner_suite = unittest.TestSuite([test_2]) - - def gen(): - yield test_1 - yield test_2 - yield inner_suite - - suite_1 = unittest.TestSuite() - suite_1.addTests(gen()) - - self.assertEqual(list(suite_1), list(gen())) - - # "This is equivalent to iterating over tests, calling addTest() for - # each element" - suite_2 = unittest.TestSuite() - for t in gen(): - suite_2.addTest(t) - - self.assertEqual(suite_1, suite_2) - - # "Add all the tests from an iterable of TestCase and TestSuite - # instances to this test suite." - # - # What happens if it doesn't get an iterable? - def test_addTest__noniterable(self): - suite = unittest.TestSuite() - - try: - suite.addTests(5) - except TypeError: - pass - else: - self.fail("Failed to raise TypeError") - - def test_addTest__noncallable(self): - suite = unittest.TestSuite() - self.assertRaises(TypeError, suite.addTest, 5) - - def test_addTest__casesuiteclass(self): - suite = unittest.TestSuite() - self.assertRaises(TypeError, suite.addTest, Test_TestSuite) - self.assertRaises(TypeError, suite.addTest, unittest.TestSuite) - - def test_addTests__string(self): - suite = unittest.TestSuite() - self.assertRaises(TypeError, suite.addTests, "foo") - - def test_function_in_suite(self): - def f(_): - pass - suite = unittest.TestSuite() - suite.addTest(f) - - # when the bug is fixed this line will not crash - suite.run(unittest.TestResult()) - - - - def test_basetestsuite(self): - class Test(unittest.TestCase): - wasSetUp = False - wasTornDown = False - @classmethod - def setUpClass(cls): - cls.wasSetUp = True - @classmethod - def tearDownClass(cls): - cls.wasTornDown = True - def testPass(self): - pass - def testFail(self): - fail - class Module(object): - wasSetUp = False - wasTornDown = False - @staticmethod - def setUpModule(): - Module.wasSetUp = True - @staticmethod - def tearDownModule(): - Module.wasTornDown = True - - Test.__module__ = 'Module' - sys.modules['Module'] = Module - self.addCleanup(sys.modules.pop, 'Module') - - suite = unittest.BaseTestSuite() - suite.addTests([Test('testPass'), Test('testFail')]) - self.assertEqual(suite.countTestCases(), 2) - - result = unittest.TestResult() - suite.run(result) - self.assertFalse(Module.wasSetUp) - self.assertFalse(Module.wasTornDown) - self.assertFalse(Test.wasSetUp) - self.assertFalse(Test.wasTornDown) - self.assertEqual(len(result.errors), 1) - self.assertEqual(len(result.failures), 0) - self.assertEqual(result.testsRun, 2) - - - def test_overriding_call(self): - class MySuite(unittest.TestSuite): - called = False - def __call__(self, *args, **kw): - self.called = True - unittest.TestSuite.__call__(self, *args, **kw) - - suite = MySuite() - result = unittest.TestResult() - wrapper = unittest.TestSuite() - wrapper.addTest(suite) - wrapper(result) - self.assertTrue(suite.called) - - # reusing results should be permitted even if abominable - self.assertFalse(result._testRunEntered) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/Lib/unittest/util.py b/python/Lib/unittest/util.py deleted file mode 100644 index 220a024e90..0000000000 --- a/python/Lib/unittest/util.py +++ /dev/null @@ -1,156 +0,0 @@ -"""Various utility functions.""" -from collections import namedtuple, OrderedDict - - -__unittest = True - -_MAX_LENGTH = 80 -def safe_repr(obj, short=False): - try: - result = repr(obj) - except Exception: - result = object.__repr__(obj) - if not short or len(result) < _MAX_LENGTH: - return result - return result[:_MAX_LENGTH] + ' [truncated]...' - - -def strclass(cls): - return "%s.%s" % (cls.__module__, cls.__name__) - -def sorted_list_difference(expected, actual): - """Finds elements in only one or the other of two, sorted input lists. - - Returns a two-element tuple of lists. The first list contains those - elements in the "expected" list but not in the "actual" list, and the - second contains those elements in the "actual" list but not in the - "expected" list. Duplicate elements in either input list are ignored. - """ - i = j = 0 - missing = [] - unexpected = [] - while True: - try: - e = expected[i] - a = actual[j] - if e < a: - missing.append(e) - i += 1 - while expected[i] == e: - i += 1 - elif e > a: - unexpected.append(a) - j += 1 - while actual[j] == a: - j += 1 - else: - i += 1 - try: - while expected[i] == e: - i += 1 - finally: - j += 1 - while actual[j] == a: - j += 1 - except IndexError: - missing.extend(expected[i:]) - unexpected.extend(actual[j:]) - break - return missing, unexpected - - -def unorderable_list_difference(expected, actual, ignore_duplicate=False): - """Same behavior as sorted_list_difference but - for lists of unorderable items (like dicts). - - As it does a linear search per item (remove) it - has O(n*n) performance. - """ - missing = [] - unexpected = [] - while expected: - item = expected.pop() - try: - actual.remove(item) - except ValueError: - missing.append(item) - if ignore_duplicate: - for lst in expected, actual: - try: - while True: - lst.remove(item) - except ValueError: - pass - if ignore_duplicate: - while actual: - item = actual.pop() - unexpected.append(item) - try: - while True: - actual.remove(item) - except ValueError: - pass - return missing, unexpected - - # anything left in actual is unexpected - return missing, actual - -_Mismatch = namedtuple('Mismatch', 'actual expected value') - -def _count_diff_all_purpose(actual, expected): - 'Returns list of (cnt_act, cnt_exp, elem) triples where the counts differ' - # elements need not be hashable - s, t = list(actual), list(expected) - m, n = len(s), len(t) - NULL = object() - result = [] - for i, elem in enumerate(s): - if elem is NULL: - continue - cnt_s = cnt_t = 0 - for j in range(i, m): - if s[j] == elem: - cnt_s += 1 - s[j] = NULL - for j, other_elem in enumerate(t): - if other_elem == elem: - cnt_t += 1 - t[j] = NULL - if cnt_s != cnt_t: - diff = _Mismatch(cnt_s, cnt_t, elem) - result.append(diff) - - for i, elem in enumerate(t): - if elem is NULL: - continue - cnt_t = 0 - for j in range(i, n): - if t[j] == elem: - cnt_t += 1 - t[j] = NULL - diff = _Mismatch(0, cnt_t, elem) - result.append(diff) - return result - -def _ordered_count(iterable): - 'Return dict of element counts, in the order they were first seen' - c = OrderedDict() - for elem in iterable: - c[elem] = c.get(elem, 0) + 1 - return c - -def _count_diff_hashable(actual, expected): - 'Returns list of (cnt_act, cnt_exp, elem) triples where the counts differ' - # elements must be hashable - s, t = _ordered_count(actual), _ordered_count(expected) - result = [] - for elem, cnt_s in s.items(): - cnt_t = t.get(elem, 0) - if cnt_s != cnt_t: - diff = _Mismatch(cnt_s, cnt_t, elem) - result.append(diff) - for elem, cnt_t in t.items(): - if elem not in s: - diff = _Mismatch(0, cnt_t, elem) - result.append(diff) - return result diff --git a/python/Lib/urllib.py b/python/Lib/urllib.py deleted file mode 100755 index 4e43e0a83f..0000000000 --- a/python/Lib/urllib.py +++ /dev/null @@ -1,1669 +0,0 @@ -"""Open an arbitrary URL. - -See the following document for more info on URLs: -"Names and Addresses, URIs, URLs, URNs, URCs", at -http://www.w3.org/pub/WWW/Addressing/Overview.html - -See also the HTTP spec (from which the error codes are derived): -"HTTP - Hypertext Transfer Protocol", at -http://www.w3.org/pub/WWW/Protocols/ - -Related standards and specs: -- RFC1808: the "relative URL" spec. (authoritative status) -- RFC1738 - the "URL standard". (authoritative status) -- RFC1630 - the "URI spec". (informational status) - -The object returned by URLopener().open(file) will differ per -protocol. All you know is that is has methods read(), readline(), -readlines(), fileno(), close() and info(). The read*(), fileno() -and close() methods work like those of open files. -The info() method returns a mimetools.Message object which can be -used to query various info about the object, if available. -(mimetools.Message objects are queried with the getheader() method.) -""" - -import string -import socket -import os -import time -import sys -import base64 -import re - -from urlparse import urljoin as basejoin - -__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", - "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", - "urlencode", "url2pathname", "pathname2url", "splittag", - "localhost", "thishost", "ftperrors", "basejoin", "unwrap", - "splittype", "splithost", "splituser", "splitpasswd", "splitport", - "splitnport", "splitquery", "splitattr", "splitvalue", - "getproxies"] - -__version__ = '1.17' # XXX This version is not always updated :-( - -MAXFTPCACHE = 10 # Trim the ftp cache beyond this size - -# Helper for non-unix systems -if os.name == 'nt': - from nturl2path import url2pathname, pathname2url -elif os.name == 'riscos': - from rourl2path import url2pathname, pathname2url -else: - def url2pathname(pathname): - """OS-specific conversion from a relative URL of the 'file' scheme - to a file system path; not recommended for general use.""" - return unquote(pathname) - - def pathname2url(pathname): - """OS-specific conversion from a file system path to a relative URL - of the 'file' scheme; not recommended for general use.""" - return quote(pathname) - -# This really consists of two pieces: -# (1) a class which handles opening of all sorts of URLs -# (plus assorted utilities etc.) -# (2) a set of functions for parsing URLs -# XXX Should these be separated out into different modules? - - -# Shortcut for basic usage -_urlopener = None -def urlopen(url, data=None, proxies=None, context=None): - """Create a file-like object for the specified URL to read from.""" - from warnings import warnpy3k - warnpy3k("urllib.urlopen() has been removed in Python 3.0 in " - "favor of urllib2.urlopen()", stacklevel=2) - - global _urlopener - if proxies is not None or context is not None: - opener = FancyURLopener(proxies=proxies, context=context) - elif not _urlopener: - opener = FancyURLopener() - _urlopener = opener - else: - opener = _urlopener - if data is None: - return opener.open(url) - else: - return opener.open(url, data) -def urlretrieve(url, filename=None, reporthook=None, data=None, context=None): - global _urlopener - if context is not None: - opener = FancyURLopener(context=context) - elif not _urlopener: - _urlopener = opener = FancyURLopener() - else: - opener = _urlopener - return opener.retrieve(url, filename, reporthook, data) -def urlcleanup(): - if _urlopener: - _urlopener.cleanup() - _safe_quoters.clear() - ftpcache.clear() - -# check for SSL -try: - import ssl -except: - _have_ssl = False -else: - _have_ssl = True - -# exception raised when downloaded size does not match content-length -class ContentTooShortError(IOError): - def __init__(self, message, content): - IOError.__init__(self, message) - self.content = content - -ftpcache = {} -class URLopener: - """Class to open URLs. - This is a class rather than just a subroutine because we may need - more than one set of global protocol-specific options. - Note -- this is a base class for those who don't want the - automatic handling of errors type 302 (relocated) and 401 - (authorization needed).""" - - __tempfiles = None - - version = "Python-urllib/%s" % __version__ - - # Constructor - def __init__(self, proxies=None, context=None, **x509): - if proxies is None: - proxies = getproxies() - assert hasattr(proxies, 'has_key'), "proxies must be a mapping" - self.proxies = proxies - self.key_file = x509.get('key_file') - self.cert_file = x509.get('cert_file') - self.context = context - self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')] - self.__tempfiles = [] - self.__unlink = os.unlink # See cleanup() - self.tempcache = None - # Undocumented feature: if you assign {} to tempcache, - # it is used to cache files retrieved with - # self.retrieve(). This is not enabled by default - # since it does not work for changing documents (and I - # haven't got the logic to check expiration headers - # yet). - self.ftpcache = ftpcache - # Undocumented feature: you can use a different - # ftp cache by assigning to the .ftpcache member; - # in case you want logically independent URL openers - # XXX This is not threadsafe. Bah. - - def __del__(self): - self.close() - - def close(self): - self.cleanup() - - def cleanup(self): - # This code sometimes runs when the rest of this module - # has already been deleted, so it can't use any globals - # or import anything. - if self.__tempfiles: - for file in self.__tempfiles: - try: - self.__unlink(file) - except OSError: - pass - del self.__tempfiles[:] - if self.tempcache: - self.tempcache.clear() - - def addheader(self, *args): - """Add a header to be used by the HTTP interface only - e.g. u.addheader('Accept', 'sound/basic')""" - self.addheaders.append(args) - - # External interface - def open(self, fullurl, data=None): - """Use URLopener().open(file) instead of open(file, 'r').""" - fullurl = unwrap(toBytes(fullurl)) - # percent encode url, fixing lame server errors for e.g, like space - # within url paths. - fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") - if self.tempcache and fullurl in self.tempcache: - filename, headers = self.tempcache[fullurl] - fp = open(filename, 'rb') - return addinfourl(fp, headers, fullurl) - urltype, url = splittype(fullurl) - if not urltype: - urltype = 'file' - if urltype in self.proxies: - proxy = self.proxies[urltype] - urltype, proxyhost = splittype(proxy) - host, selector = splithost(proxyhost) - url = (host, fullurl) # Signal special case to open_*() - else: - proxy = None - name = 'open_' + urltype - self.type = urltype - name = name.replace('-', '_') - if not hasattr(self, name): - if proxy: - return self.open_unknown_proxy(proxy, fullurl, data) - else: - return self.open_unknown(fullurl, data) - try: - if data is None: - return getattr(self, name)(url) - else: - return getattr(self, name)(url, data) - except socket.error, msg: - raise IOError, ('socket error', msg), sys.exc_info()[2] - - def open_unknown(self, fullurl, data=None): - """Overridable interface to open unknown URL type.""" - type, url = splittype(fullurl) - raise IOError, ('url error', 'unknown url type', type) - - def open_unknown_proxy(self, proxy, fullurl, data=None): - """Overridable interface to open unknown URL type.""" - type, url = splittype(fullurl) - raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) - - # External interface - def retrieve(self, url, filename=None, reporthook=None, data=None): - """retrieve(url) returns (filename, headers) for a local object - or (tempfilename, headers) for a remote object.""" - url = unwrap(toBytes(url)) - if self.tempcache and url in self.tempcache: - return self.tempcache[url] - type, url1 = splittype(url) - if filename is None and (not type or type == 'file'): - try: - fp = self.open_local_file(url1) - hdrs = fp.info() - fp.close() - return url2pathname(splithost(url1)[1]), hdrs - except IOError: - pass - fp = self.open(url, data) - try: - headers = fp.info() - if filename: - tfp = open(filename, 'wb') - else: - import tempfile - garbage, path = splittype(url) - garbage, path = splithost(path or "") - path, garbage = splitquery(path or "") - path, garbage = splitattr(path or "") - suffix = os.path.splitext(path)[1] - (fd, filename) = tempfile.mkstemp(suffix) - self.__tempfiles.append(filename) - tfp = os.fdopen(fd, 'wb') - try: - result = filename, headers - if self.tempcache is not None: - self.tempcache[url] = result - bs = 1024*8 - size = -1 - read = 0 - blocknum = 0 - if "content-length" in headers: - size = int(headers["Content-Length"]) - if reporthook: - reporthook(blocknum, bs, size) - while 1: - block = fp.read(bs) - if block == "": - break - read += len(block) - tfp.write(block) - blocknum += 1 - if reporthook: - reporthook(blocknum, bs, size) - finally: - tfp.close() - finally: - fp.close() - - # raise exception if actual size does not match content-length header - if size >= 0 and read < size: - raise ContentTooShortError("retrieval incomplete: got only %i out " - "of %i bytes" % (read, size), result) - - return result - - # Each method named open_ knows how to open that type of URL - - def open_http(self, url, data=None): - """Use HTTP protocol.""" - import httplib - user_passwd = None - proxy_passwd= None - if isinstance(url, str): - host, selector = splithost(url) - if host: - user_passwd, host = splituser(host) - host = unquote(host) - realhost = host - else: - host, selector = url - # check whether the proxy contains authorization information - proxy_passwd, host = splituser(host) - # now we proceed with the url we want to obtain - urltype, rest = splittype(selector) - url = rest - user_passwd = None - if urltype.lower() != 'http': - realhost = None - else: - realhost, rest = splithost(rest) - if realhost: - user_passwd, realhost = splituser(realhost) - if user_passwd: - selector = "%s://%s%s" % (urltype, realhost, rest) - if proxy_bypass(realhost): - host = realhost - - #print "proxy via http:", host, selector - if not host: raise IOError, ('http error', 'no host given') - - if proxy_passwd: - proxy_passwd = unquote(proxy_passwd) - proxy_auth = base64.b64encode(proxy_passwd).strip() - else: - proxy_auth = None - - if user_passwd: - user_passwd = unquote(user_passwd) - auth = base64.b64encode(user_passwd).strip() - else: - auth = None - h = httplib.HTTP(host) - if data is not None: - h.putrequest('POST', selector) - h.putheader('Content-Type', 'application/x-www-form-urlencoded') - h.putheader('Content-Length', '%d' % len(data)) - else: - h.putrequest('GET', selector) - if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) - if auth: h.putheader('Authorization', 'Basic %s' % auth) - if realhost: h.putheader('Host', realhost) - for args in self.addheaders: h.putheader(*args) - h.endheaders(data) - errcode, errmsg, headers = h.getreply() - fp = h.getfile() - if errcode == -1: - if fp: fp.close() - # something went wrong with the HTTP status line - raise IOError, ('http protocol error', 0, - 'got a bad status line', None) - # According to RFC 2616, "2xx" code indicates that the client's - # request was successfully received, understood, and accepted. - if (200 <= errcode < 300): - return addinfourl(fp, headers, "http:" + url, errcode) - else: - if data is None: - return self.http_error(url, fp, errcode, errmsg, headers) - else: - return self.http_error(url, fp, errcode, errmsg, headers, data) - - def http_error(self, url, fp, errcode, errmsg, headers, data=None): - """Handle http errors. - Derived class can override this, or provide specific handlers - named http_error_DDD where DDD is the 3-digit error code.""" - # First check if there's a specific handler for this error - name = 'http_error_%d' % errcode - if hasattr(self, name): - method = getattr(self, name) - if data is None: - result = method(url, fp, errcode, errmsg, headers) - else: - result = method(url, fp, errcode, errmsg, headers, data) - if result: return result - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handler: close the connection and raise IOError.""" - fp.close() - raise IOError, ('http error', errcode, errmsg, headers) - - if _have_ssl: - def open_https(self, url, data=None): - """Use HTTPS protocol.""" - - import httplib - user_passwd = None - proxy_passwd = None - if isinstance(url, str): - host, selector = splithost(url) - if host: - user_passwd, host = splituser(host) - host = unquote(host) - realhost = host - else: - host, selector = url - # here, we determine, whether the proxy contains authorization information - proxy_passwd, host = splituser(host) - urltype, rest = splittype(selector) - url = rest - user_passwd = None - if urltype.lower() != 'https': - realhost = None - else: - realhost, rest = splithost(rest) - if realhost: - user_passwd, realhost = splituser(realhost) - if user_passwd: - selector = "%s://%s%s" % (urltype, realhost, rest) - #print "proxy via https:", host, selector - if not host: raise IOError, ('https error', 'no host given') - if proxy_passwd: - proxy_passwd = unquote(proxy_passwd) - proxy_auth = base64.b64encode(proxy_passwd).strip() - else: - proxy_auth = None - if user_passwd: - user_passwd = unquote(user_passwd) - auth = base64.b64encode(user_passwd).strip() - else: - auth = None - h = httplib.HTTPS(host, 0, - key_file=self.key_file, - cert_file=self.cert_file, - context=self.context) - if data is not None: - h.putrequest('POST', selector) - h.putheader('Content-Type', - 'application/x-www-form-urlencoded') - h.putheader('Content-Length', '%d' % len(data)) - else: - h.putrequest('GET', selector) - if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) - if auth: h.putheader('Authorization', 'Basic %s' % auth) - if realhost: h.putheader('Host', realhost) - for args in self.addheaders: h.putheader(*args) - h.endheaders(data) - errcode, errmsg, headers = h.getreply() - fp = h.getfile() - if errcode == -1: - if fp: fp.close() - # something went wrong with the HTTP status line - raise IOError, ('http protocol error', 0, - 'got a bad status line', None) - # According to RFC 2616, "2xx" code indicates that the client's - # request was successfully received, understood, and accepted. - if (200 <= errcode < 300): - return addinfourl(fp, headers, "https:" + url, errcode) - else: - if data is None: - return self.http_error(url, fp, errcode, errmsg, headers) - else: - return self.http_error(url, fp, errcode, errmsg, headers, - data) - - def open_file(self, url): - """Use local file or FTP depending on form of URL.""" - if not isinstance(url, str): - raise IOError, ('file error', 'proxy support for file protocol currently not implemented') - if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': - return self.open_ftp(url) - else: - return self.open_local_file(url) - - def open_local_file(self, url): - """Use local file.""" - import mimetypes, mimetools, email.utils - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - host, file = splithost(url) - localname = url2pathname(file) - try: - stats = os.stat(localname) - except OSError, e: - raise IOError(e.errno, e.strerror, e.filename) - size = stats.st_size - modified = email.utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(url)[0] - headers = mimetools.Message(StringIO( - 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) - if not host: - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - elif file[:2] == './': - raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) - return addinfourl(open(localname, 'rb'), - headers, urlfile) - host, port = splitport(host) - if not port \ - and socket.gethostbyname(host) in (localhost(), thishost()): - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - return addinfourl(open(localname, 'rb'), - headers, urlfile) - raise IOError, ('local file error', 'not on local host') - - def open_ftp(self, url): - """Use FTP protocol.""" - if not isinstance(url, str): - raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') - import mimetypes, mimetools - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - host, path = splithost(url) - if not host: raise IOError, ('ftp error', 'no host given') - host, port = splitport(host) - user, host = splituser(host) - if user: user, passwd = splitpasswd(user) - else: passwd = None - host = unquote(host) - user = user or '' - passwd = passwd or '' - host = socket.gethostbyname(host) - if not port: - import ftplib - port = ftplib.FTP_PORT - else: - port = int(port) - path, attrs = splitattr(path) - path = unquote(path) - dirs = path.split('/') - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: dirs = dirs[1:] - if dirs and not dirs[0]: dirs[0] = '/' - key = user, host, port, '/'.join(dirs) - # XXX thread unsafe! - if len(self.ftpcache) > MAXFTPCACHE: - # Prune the cache, rather arbitrarily - for k in self.ftpcache.keys(): - if k != key: - v = self.ftpcache[k] - del self.ftpcache[k] - v.close() - try: - if not key in self.ftpcache: - self.ftpcache[key] = \ - ftpwrapper(user, passwd, host, port, dirs) - if not file: type = 'D' - else: type = 'I' - for attr in attrs: - attr, value = splitvalue(attr) - if attr.lower() == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = value.upper() - (fp, retrlen) = self.ftpcache[key].retrfile(file, type) - mtype = mimetypes.guess_type("ftp:" + url)[0] - headers = "" - if mtype: - headers += "Content-Type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: - headers += "Content-Length: %d\n" % retrlen - headers = mimetools.Message(StringIO(headers)) - return addinfourl(fp, headers, "ftp:" + url) - except ftperrors(), msg: - raise IOError, ('ftp error', msg), sys.exc_info()[2] - - def open_data(self, url, data=None): - """Use "data" URL.""" - if not isinstance(url, str): - raise IOError, ('data error', 'proxy support for data protocol currently not implemented') - # ignore POSTed data - # - # syntax of data URLs: - # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data - # mediatype := [ type "/" subtype ] *( ";" parameter ) - # data := *urlchar - # parameter := attribute "=" value - import mimetools - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - try: - [type, data] = url.split(',', 1) - except ValueError: - raise IOError, ('data error', 'bad data URL') - if not type: - type = 'text/plain;charset=US-ASCII' - semi = type.rfind(';') - if semi >= 0 and '=' not in type[semi:]: - encoding = type[semi+1:] - type = type[:semi] - else: - encoding = '' - msg = [] - msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', - time.gmtime(time.time()))) - msg.append('Content-type: %s' % type) - if encoding == 'base64': - data = base64.decodestring(data) - else: - data = unquote(data) - msg.append('Content-Length: %d' % len(data)) - msg.append('') - msg.append(data) - msg = '\n'.join(msg) - f = StringIO(msg) - headers = mimetools.Message(f, 0) - #f.fileno = None # needed for addinfourl - return addinfourl(f, headers, url) - - -class FancyURLopener(URLopener): - """Derived class with handlers for errors we can handle (perhaps).""" - - def __init__(self, *args, **kwargs): - URLopener.__init__(self, *args, **kwargs) - self.auth_cache = {} - self.tries = 0 - self.maxtries = 10 - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handling -- don't raise an exception.""" - return addinfourl(fp, headers, "http:" + url, errcode) - - def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): - """Error 302 -- relocated (temporarily).""" - self.tries += 1 - try: - if self.maxtries and self.tries >= self.maxtries: - if hasattr(self, "http_error_500"): - meth = self.http_error_500 - else: - meth = self.http_error_default - return meth(url, fp, 500, - "Internal Server Error: Redirect Recursion", - headers) - result = self.redirect_internal(url, fp, errcode, errmsg, - headers, data) - return result - finally: - self.tries = 0 - - def redirect_internal(self, url, fp, errcode, errmsg, headers, data): - if 'location' in headers: - newurl = headers['location'] - elif 'uri' in headers: - newurl = headers['uri'] - else: - return - fp.close() - # In case the server sent a relative URL, join with original: - newurl = basejoin(self.type + ":" + url, newurl) - - # For security reasons we do not allow redirects to protocols - # other than HTTP, HTTPS or FTP. - newurl_lower = newurl.lower() - if not (newurl_lower.startswith('http://') or - newurl_lower.startswith('https://') or - newurl_lower.startswith('ftp://')): - raise IOError('redirect error', errcode, - errmsg + " - Redirection to url '%s' is not allowed" % - newurl, - headers) - - return self.open(newurl) - - def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - """Error 301 -- also relocated (permanently).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): - """Error 303 -- also relocated (essentially identical to 302).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): - """Error 307 -- relocated, but turn POST into error.""" - if data is None: - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - else: - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): - """Error 401 -- authentication required. - This function supports Basic authentication only.""" - if not 'www-authenticate' in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['www-authenticate'] - import re - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - name = 'retry_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): - """Error 407 -- proxy authentication required. - This function supports Basic authentication only.""" - if not 'proxy-authenticate' in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['proxy-authenticate'] - import re - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - name = 'retry_proxy_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def retry_proxy_http_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - newurl = 'http://' + host + selector - proxy = self.proxies['http'] - urltype, proxyhost = splittype(proxy) - proxyhost, proxyselector = splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost - self.proxies['http'] = 'http://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_proxy_https_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - newurl = 'https://' + host + selector - proxy = self.proxies['https'] - urltype, proxyhost = splittype(proxy) - proxyhost, proxyselector = splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost - self.proxies['https'] = 'https://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_http_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host - newurl = 'http://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_https_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host - newurl = 'https://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def get_user_passwd(self, host, realm, clear_cache=0): - key = realm + '@' + host.lower() - if key in self.auth_cache: - if clear_cache: - del self.auth_cache[key] - else: - return self.auth_cache[key] - user, passwd = self.prompt_user_passwd(host, realm) - if user or passwd: self.auth_cache[key] = (user, passwd) - return user, passwd - - def prompt_user_passwd(self, host, realm): - """Override this in a GUI environment!""" - import getpass - try: - user = raw_input("Enter username for %s at %s: " % (realm, - host)) - passwd = getpass.getpass("Enter password for %s in %s at %s: " % - (user, realm, host)) - return user, passwd - except KeyboardInterrupt: - print - return None, None - - -# Utility functions - -_localhost = None -def localhost(): - """Return the IP address of the magic hostname 'localhost'.""" - global _localhost - if _localhost is None: - _localhost = socket.gethostbyname('localhost') - return _localhost - -_thishost = None -def thishost(): - """Return the IP address of the current host.""" - global _thishost - if _thishost is None: - try: - _thishost = socket.gethostbyname(socket.gethostname()) - except socket.gaierror: - _thishost = socket.gethostbyname('localhost') - return _thishost - -_ftperrors = None -def ftperrors(): - """Return the set of errors raised by the FTP class.""" - global _ftperrors - if _ftperrors is None: - import ftplib - _ftperrors = ftplib.all_errors - return _ftperrors - -_noheaders = None -def noheaders(): - """Return an empty mimetools.Message object.""" - global _noheaders - if _noheaders is None: - import mimetools - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - _noheaders = mimetools.Message(StringIO(), 0) - _noheaders.fp.close() # Recycle file descriptor - return _noheaders - - -# Utility classes - -class ftpwrapper: - """Class used by open_ftp() for cache of open FTP connections.""" - - def __init__(self, user, passwd, host, port, dirs, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - persistent=True): - self.user = user - self.passwd = passwd - self.host = host - self.port = port - self.dirs = dirs - self.timeout = timeout - self.refcount = 0 - self.keepalive = persistent - try: - self.init() - except: - self.close() - raise - - def init(self): - import ftplib - self.busy = 0 - self.ftp = ftplib.FTP() - self.ftp.connect(self.host, self.port, self.timeout) - self.ftp.login(self.user, self.passwd) - _target = '/'.join(self.dirs) - self.ftp.cwd(_target) - - def retrfile(self, file, type): - import ftplib - self.endtransfer() - if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 - else: cmd = 'TYPE ' + type; isdir = 0 - try: - self.ftp.voidcmd(cmd) - except ftplib.all_errors: - self.init() - self.ftp.voidcmd(cmd) - conn = None - if file and not isdir: - # Try to retrieve as a file - try: - cmd = 'RETR ' + file - conn, retrlen = self.ftp.ntransfercmd(cmd) - except ftplib.error_perm, reason: - if str(reason)[:3] != '550': - raise IOError, ('ftp error', reason), sys.exc_info()[2] - if not conn: - # Set transfer mode to ASCII! - self.ftp.voidcmd('TYPE A') - # Try a directory listing. Verify that directory exists. - if file: - pwd = self.ftp.pwd() - try: - try: - self.ftp.cwd(file) - except ftplib.error_perm, reason: - raise IOError, ('ftp error', reason), sys.exc_info()[2] - finally: - self.ftp.cwd(pwd) - cmd = 'LIST ' + file - else: - cmd = 'LIST' - conn, retrlen = self.ftp.ntransfercmd(cmd) - self.busy = 1 - ftpobj = addclosehook(conn.makefile('rb'), self.file_close) - self.refcount += 1 - conn.close() - # Pass back both a suitably decorated object and a retrieval length - return (ftpobj, retrlen) - - def endtransfer(self): - self.busy = 0 - - def close(self): - self.keepalive = False - if self.refcount <= 0: - self.real_close() - - def file_close(self): - self.endtransfer() - self.refcount -= 1 - if self.refcount <= 0 and not self.keepalive: - self.real_close() - - def real_close(self): - self.endtransfer() - try: - self.ftp.close() - except ftperrors(): - pass - -class addbase: - """Base class for addinfo and addclosehook.""" - - def __init__(self, fp): - self.fp = fp - self.read = self.fp.read - self.readline = self.fp.readline - if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines - if hasattr(self.fp, "fileno"): - self.fileno = self.fp.fileno - else: - self.fileno = lambda: None - if hasattr(self.fp, "__iter__"): - self.__iter__ = self.fp.__iter__ - if hasattr(self.fp, "next"): - self.next = self.fp.next - - def __repr__(self): - return '<%s at %r whose fp = %r>' % (self.__class__.__name__, - id(self), self.fp) - - def close(self): - self.read = None - self.readline = None - self.readlines = None - self.fileno = None - if self.fp: self.fp.close() - self.fp = None - -class addclosehook(addbase): - """Class to add a close hook to an open file.""" - - def __init__(self, fp, closehook, *hookargs): - addbase.__init__(self, fp) - self.closehook = closehook - self.hookargs = hookargs - - def close(self): - try: - closehook = self.closehook - hookargs = self.hookargs - if closehook: - self.closehook = None - self.hookargs = None - closehook(*hookargs) - finally: - addbase.close(self) - - -class addinfo(addbase): - """class to add an info() method to an open file.""" - - def __init__(self, fp, headers): - addbase.__init__(self, fp) - self.headers = headers - - def info(self): - return self.headers - -class addinfourl(addbase): - """class to add info() and geturl() methods to an open file.""" - - def __init__(self, fp, headers, url, code=None): - addbase.__init__(self, fp) - self.headers = headers - self.url = url - self.code = code - - def info(self): - return self.headers - - def getcode(self): - return self.code - - def geturl(self): - return self.url - - -# Utilities to parse URLs (most of these return None for missing parts): -# unwrap('') --> 'type://host/path' -# splittype('type:opaquestring') --> 'type', 'opaquestring' -# splithost('//host[:port]/path') --> 'host[:port]', '/path' -# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' -# splitpasswd('user:passwd') -> 'user', 'passwd' -# splitport('host:port') --> 'host', 'port' -# splitquery('/path?query') --> '/path', 'query' -# splittag('/path#tag') --> '/path', 'tag' -# splitattr('/path;attr1=value1;attr2=value2;...') -> -# '/path', ['attr1=value1', 'attr2=value2', ...] -# splitvalue('attr=value') --> 'attr', 'value' -# unquote('abc%20def') -> 'abc def' -# quote('abc def') -> 'abc%20def') - -try: - unicode -except NameError: - def _is_unicode(x): - return 0 -else: - def _is_unicode(x): - return isinstance(x, unicode) - -def toBytes(url): - """toBytes(u"URL") --> 'URL'.""" - # Most URL schemes require ASCII. If that changes, the conversion - # can be relaxed - if _is_unicode(url): - try: - url = url.encode("ASCII") - except UnicodeError: - raise UnicodeError("URL " + repr(url) + - " contains non-ASCII characters") - return url - -def unwrap(url): - """unwrap('') --> 'type://host/path'.""" - url = url.strip() - if url[:1] == '<' and url[-1:] == '>': - url = url[1:-1].strip() - if url[:4] == 'URL:': url = url[4:].strip() - return url - -_typeprog = None -def splittype(url): - """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" - global _typeprog - if _typeprog is None: - import re - _typeprog = re.compile('^([^/:]+):') - - match = _typeprog.match(url) - if match: - scheme = match.group(1) - return scheme.lower(), url[len(scheme) + 1:] - return None, url - -_hostprog = None -def splithost(url): - """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" - global _hostprog - if _hostprog is None: - import re - _hostprog = re.compile('^//([^/?]*)(.*)$') - - match = _hostprog.match(url) - if match: - host_port = match.group(1) - path = match.group(2) - if path and not path.startswith('/'): - path = '/' + path - return host_port, path - return None, url - -_userprog = None -def splituser(host): - """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" - global _userprog - if _userprog is None: - import re - _userprog = re.compile('^(.*)@(.*)$') - - match = _userprog.match(host) - if match: return match.group(1, 2) - return None, host - -_passwdprog = None -def splitpasswd(user): - """splitpasswd('user:passwd') -> 'user', 'passwd'.""" - global _passwdprog - if _passwdprog is None: - import re - _passwdprog = re.compile('^([^:]*):(.*)$',re.S) - - match = _passwdprog.match(user) - if match: return match.group(1, 2) - return user, None - -# splittag('/path#tag') --> '/path', 'tag' -_portprog = None -def splitport(host): - """splitport('host:port') --> 'host', 'port'.""" - global _portprog - if _portprog is None: - import re - _portprog = re.compile('^(.*):([0-9]*)$') - - match = _portprog.match(host) - if match: - host, port = match.groups() - if port: - return host, port - return host, None - -_nportprog = None -def splitnport(host, defport=-1): - """Split host and port, returning numeric port. - Return given default port if no ':' found; defaults to -1. - Return numerical port if a valid number are found after ':'. - Return None if ':' but not a valid number.""" - global _nportprog - if _nportprog is None: - import re - _nportprog = re.compile('^(.*):(.*)$') - - match = _nportprog.match(host) - if match: - host, port = match.group(1, 2) - if port: - try: - nport = int(port) - except ValueError: - nport = None - return host, nport - return host, defport - -_queryprog = None -def splitquery(url): - """splitquery('/path?query') --> '/path', 'query'.""" - global _queryprog - if _queryprog is None: - import re - _queryprog = re.compile('^(.*)\?([^?]*)$') - - match = _queryprog.match(url) - if match: return match.group(1, 2) - return url, None - -_tagprog = None -def splittag(url): - """splittag('/path#tag') --> '/path', 'tag'.""" - global _tagprog - if _tagprog is None: - import re - _tagprog = re.compile('^(.*)#([^#]*)$') - - match = _tagprog.match(url) - if match: return match.group(1, 2) - return url, None - -def splitattr(url): - """splitattr('/path;attr1=value1;attr2=value2;...') -> - '/path', ['attr1=value1', 'attr2=value2', ...].""" - words = url.split(';') - return words[0], words[1:] - -_valueprog = None -def splitvalue(attr): - """splitvalue('attr=value') --> 'attr', 'value'.""" - global _valueprog - if _valueprog is None: - import re - _valueprog = re.compile('^([^=]*)=(.*)$') - - match = _valueprog.match(attr) - if match: return match.group(1, 2) - return attr, None - -# urlparse contains a duplicate of this method to avoid a circular import. If -# you update this method, also update the copy in urlparse. This code -# duplication does not exist in Python3. - -_hexdig = '0123456789ABCDEFabcdef' -_hextochr = dict((a + b, chr(int(a + b, 16))) - for a in _hexdig for b in _hexdig) -_asciire = re.compile('([\x00-\x7f]+)') - -def unquote(s): - """unquote('abc%20def') -> 'abc def'.""" - if _is_unicode(s): - if '%' not in s: - return s - bits = _asciire.split(s) - res = [bits[0]] - append = res.append - for i in range(1, len(bits), 2): - append(unquote(str(bits[i])).decode('latin1')) - append(bits[i + 1]) - return ''.join(res) - - bits = s.split('%') - # fastpath - if len(bits) == 1: - return s - res = [bits[0]] - append = res.append - for item in bits[1:]: - try: - append(_hextochr[item[:2]]) - append(item[2:]) - except KeyError: - append('%') - append(item) - return ''.join(res) - -def unquote_plus(s): - """unquote('%7e/abc+def') -> '~/abc def'""" - s = s.replace('+', ' ') - return unquote(s) - -always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' - 'abcdefghijklmnopqrstuvwxyz' - '0123456789' '_.-') -_safe_map = {} -for i, c in zip(xrange(256), str(bytearray(xrange(256)))): - _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) -_safe_quoters = {} - -def quote(s, safe='/'): - """quote('abc def') -> 'abc%20def' - - Each part of a URL, e.g. the path info, the query, etc., has a - different set of reserved characters that must be quoted. - - RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists - the following reserved characters. - - reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | - "$" | "," - - Each of these characters is reserved in some component of a URL, - but not necessarily in all of them. - - By default, the quote function is intended for quoting the path - section of a URL. Thus, it will not encode '/'. This character - is reserved, but in typical usage the quote function is being - called on a path where the existing slash characters are used as - reserved characters. - """ - # fastpath - if not s: - if s is None: - raise TypeError('None object cannot be quoted') - return s - cachekey = (safe, always_safe) - try: - (quoter, safe) = _safe_quoters[cachekey] - except KeyError: - safe_map = _safe_map.copy() - safe_map.update([(c, c) for c in safe]) - quoter = safe_map.__getitem__ - safe = always_safe + safe - _safe_quoters[cachekey] = (quoter, safe) - if not s.rstrip(safe): - return s - return ''.join(map(quoter, s)) - -def quote_plus(s, safe=''): - """Quote the query fragment of a URL; replacing ' ' with '+'""" - if ' ' in s: - s = quote(s, safe + ' ') - return s.replace(' ', '+') - return quote(s, safe) - -def urlencode(query, doseq=0): - """Encode a sequence of two-element tuples or dictionary into a URL query string. - - If any values in the query arg are sequences and doseq is true, each - sequence element is converted to a separate parameter. - - If the query arg is a sequence of two-element tuples, the order of the - parameters in the output will match the order of parameters in the - input. - """ - - if hasattr(query,"items"): - # mapping objects - query = query.items() - else: - # it's a bother at times that strings and string-like objects are - # sequences... - try: - # non-sequence items should not work with len() - # non-empty strings will fail this - if len(query) and not isinstance(query[0], tuple): - raise TypeError - # zero-length sequences of all types will get here and succeed, - # but that's a minor nit - since the original implementation - # allowed empty dicts that type of behavior probably should be - # preserved for consistency - except TypeError: - ty,va,tb = sys.exc_info() - raise TypeError, "not a valid non-string sequence or mapping object", tb - - l = [] - if not doseq: - # preserve old behavior - for k, v in query: - k = quote_plus(str(k)) - v = quote_plus(str(v)) - l.append(k + '=' + v) - else: - for k, v in query: - k = quote_plus(str(k)) - if isinstance(v, str): - v = quote_plus(v) - l.append(k + '=' + v) - elif _is_unicode(v): - # is there a reasonable way to convert to ASCII? - # encode generates a string, but "replace" or "ignore" - # lose information and "strict" can raise UnicodeError - v = quote_plus(v.encode("ASCII","replace")) - l.append(k + '=' + v) - else: - try: - # is this a sufficient test for sequence-ness? - len(v) - except TypeError: - # not a sequence - v = quote_plus(str(v)) - l.append(k + '=' + v) - else: - # loop over the sequence - for elt in v: - l.append(k + '=' + quote_plus(str(elt))) - return '&'.join(l) - -# Proxy handling -def getproxies_environment(): - """Return a dictionary of scheme -> proxy server URL mappings. - - Scan the environment for variables named _proxy; - this seems to be the standard convention. In order to prefer lowercase - variables, we process the environment in two passes, first matches any - and second matches only lower case proxies. - - If you need a different way, you can pass a proxies dictionary to the - [Fancy]URLopener constructor. - """ - # Get all variables - proxies = {} - for name, value in os.environ.items(): - name = name.lower() - if value and name[-6:] == '_proxy': - proxies[name[:-6]] = value - - # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY - # (non-all-lowercase) as it may be set from the web server by a "Proxy:" - # header from the client - # If "proxy" is lowercase, it will still be used thanks to the next block - if 'REQUEST_METHOD' in os.environ: - proxies.pop('http', None) - - # Get lowercase variables - for name, value in os.environ.items(): - if name[-6:] == '_proxy': - name = name.lower() - if value: - proxies[name[:-6]] = value - else: - proxies.pop(name[:-6], None) - - return proxies - -def proxy_bypass_environment(host, proxies=None): - """Test if proxies should not be used for a particular host. - - Checks the proxies dict for the value of no_proxy, which should be a - list of comma separated DNS suffixes, or '*' for all hosts. - """ - if proxies is None: - proxies = getproxies_environment() - # don't bypass, if no_proxy isn't specified - try: - no_proxy = proxies['no'] - except KeyError: - return 0 - # '*' is special case for always bypass - if no_proxy == '*': - return 1 - # strip port off host - hostonly, port = splitport(host) - # check if the host ends with any of the DNS suffixes - no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] - for name in no_proxy_list: - if name: - name = re.escape(name) - pattern = r'(.+\.)?%s$' % name - if (re.match(pattern, hostonly, re.I) - or re.match(pattern, host, re.I)): - return 1 - # otherwise, don't bypass - return 0 - - -if sys.platform == 'darwin': - from _scproxy import _get_proxy_settings, _get_proxies - - def proxy_bypass_macosx_sysconf(host): - """ - Return True iff this host shouldn't be accessed using a proxy - - This function uses the MacOSX framework SystemConfiguration - to fetch the proxy information. - """ - import re - import socket - from fnmatch import fnmatch - - hostonly, port = splitport(host) - - def ip2num(ipAddr): - parts = ipAddr.split('.') - parts = map(int, parts) - if len(parts) != 4: - parts = (parts + [0, 0, 0, 0])[:4] - return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] - - proxy_settings = _get_proxy_settings() - - # Check for simple host names: - if '.' not in host: - if proxy_settings['exclude_simple']: - return True - - hostIP = None - - for value in proxy_settings.get('exceptions', ()): - # Items in the list are strings like these: *.local, 169.254/16 - if not value: continue - - m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) - if m is not None: - if hostIP is None: - try: - hostIP = socket.gethostbyname(hostonly) - hostIP = ip2num(hostIP) - except socket.error: - continue - - base = ip2num(m.group(1)) - mask = m.group(2) - if mask is None: - mask = 8 * (m.group(1).count('.') + 1) - - else: - mask = int(mask[1:]) - mask = 32 - mask - - if (hostIP >> mask) == (base >> mask): - return True - - elif fnmatch(host, value): - return True - - return False - - def getproxies_macosx_sysconf(): - """Return a dictionary of scheme -> proxy server URL mappings. - - This function uses the MacOSX framework SystemConfiguration - to fetch the proxy information. - """ - return _get_proxies() - - def proxy_bypass(host): - """Return True, if a host should be bypassed. - - Checks proxy settings gathered from the environment, if specified, or - from the MacOSX framework SystemConfiguration. - """ - proxies = getproxies_environment() - if proxies: - return proxy_bypass_environment(host, proxies) - else: - return proxy_bypass_macosx_sysconf(host) - - def getproxies(): - return getproxies_environment() or getproxies_macosx_sysconf() - -elif os.name == 'nt': - def getproxies_registry(): - """Return a dictionary of scheme -> proxy server URL mappings. - - Win32 uses the registry to store proxies. - - """ - proxies = {} - try: - import _winreg - except ImportError: - # Std module, so should be around - but you never know! - return proxies - try: - internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, - r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') - proxyEnable = _winreg.QueryValueEx(internetSettings, - 'ProxyEnable')[0] - if proxyEnable: - # Returned as Unicode but problems if not converted to ASCII - proxyServer = str(_winreg.QueryValueEx(internetSettings, - 'ProxyServer')[0]) - if '=' in proxyServer: - # Per-protocol settings - for p in proxyServer.split(';'): - protocol, address = p.split('=', 1) - # See if address has a type:// prefix - import re - if not re.match('^([^/:]+)://', address): - address = '%s://%s' % (protocol, address) - proxies[protocol] = address - else: - # Use one setting for all protocols - if proxyServer[:5] == 'http:': - proxies['http'] = proxyServer - else: - proxies['http'] = 'http://%s' % proxyServer - proxies['https'] = 'https://%s' % proxyServer - proxies['ftp'] = 'ftp://%s' % proxyServer - internetSettings.Close() - except (WindowsError, ValueError, TypeError): - # Either registry key not found etc, or the value in an - # unexpected format. - # proxies already set up to be empty so nothing to do - pass - return proxies - - def getproxies(): - """Return a dictionary of scheme -> proxy server URL mappings. - - Returns settings gathered from the environment, if specified, - or the registry. - - """ - return getproxies_environment() or getproxies_registry() - - def proxy_bypass_registry(host): - try: - import _winreg - import re - except ImportError: - # Std modules, so should be around - but you never know! - return 0 - try: - internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, - r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') - proxyEnable = _winreg.QueryValueEx(internetSettings, - 'ProxyEnable')[0] - proxyOverride = str(_winreg.QueryValueEx(internetSettings, - 'ProxyOverride')[0]) - # ^^^^ Returned as Unicode but problems if not converted to ASCII - except WindowsError: - return 0 - if not proxyEnable or not proxyOverride: - return 0 - # try to make a host list from name and IP address. - rawHost, port = splitport(host) - host = [rawHost] - try: - addr = socket.gethostbyname(rawHost) - if addr != rawHost: - host.append(addr) - except socket.error: - pass - try: - fqdn = socket.getfqdn(rawHost) - if fqdn != rawHost: - host.append(fqdn) - except socket.error: - pass - # make a check value list from the registry entry: replace the - # '' string by the localhost entry and the corresponding - # canonical entry. - proxyOverride = proxyOverride.split(';') - # now check if we match one of the registry values. - for test in proxyOverride: - if test == '': - if '.' not in rawHost: - return 1 - test = test.replace(".", r"\.") # mask dots - test = test.replace("*", r".*") # change glob sequence - test = test.replace("?", r".") # change glob char - for val in host: - # print "%s <--> %s" %( test, val ) - if re.match(test, val, re.I): - return 1 - return 0 - - def proxy_bypass(host): - """Return True, if the host should be bypassed. - - Checks proxy settings gathered from the environment, if specified, - or the registry. - """ - proxies = getproxies_environment() - if proxies: - return proxy_bypass_environment(host, proxies) - else: - return proxy_bypass_registry(host) - -else: - # By default use environment variables - getproxies = getproxies_environment - proxy_bypass = proxy_bypass_environment - -# Test and time quote() and unquote() -def test1(): - s = '' - for i in range(256): s = s + chr(i) - s = s*4 - t0 = time.time() - qs = quote(s) - uqs = unquote(qs) - t1 = time.time() - if uqs != s: - print 'Wrong!' - print repr(s) - print repr(qs) - print repr(uqs) - print round(t1 - t0, 3), 'sec' - - -def reporthook(blocknum, blocksize, totalsize): - # Report during remote transfers - print "Block number: %d, Block size: %d, Total size: %d" % ( - blocknum, blocksize, totalsize) diff --git a/python/Lib/urllib2.py b/python/Lib/urllib2.py deleted file mode 100755 index 8b634ada37..0000000000 --- a/python/Lib/urllib2.py +++ /dev/null @@ -1,1489 +0,0 @@ -"""An extensible library for opening URLs using a variety of protocols - -The simplest way to use this module is to call the urlopen function, -which accepts a string containing a URL or a Request object (described -below). It opens the URL and returns the results as file-like -object; the returned object has some extra methods described below. - -The OpenerDirector manages a collection of Handler objects that do -all the actual work. Each Handler implements a particular protocol or -option. The OpenerDirector is a composite object that invokes the -Handlers needed to open the requested URL. For example, the -HTTPHandler performs HTTP GET and POST requests and deals with -non-error returns. The HTTPRedirectHandler automatically deals with -HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler -deals with digest authentication. - -urlopen(url, data=None) -- Basic usage is the same as original -urllib. pass the url and optionally data to post to an HTTP URL, and -get a file-like object back. One difference is that you can also pass -a Request instance instead of URL. Raises a URLError (subclass of -IOError); for HTTP errors, raises an HTTPError, which can also be -treated as a valid response. - -build_opener -- Function that creates a new OpenerDirector instance. -Will install the default handlers. Accepts one or more Handlers as -arguments, either instances or Handler classes that it will -instantiate. If one of the argument is a subclass of the default -handler, the argument will be installed instead of the default. - -install_opener -- Installs a new opener as the default opener. - -objects of interest: - -OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages -the Handler classes, while dealing with requests and responses. - -Request -- An object that encapsulates the state of a request. The -state can be as simple as the URL. It can also include extra HTTP -headers, e.g. a User-Agent. - -BaseHandler -- - -exceptions: -URLError -- A subclass of IOError, individual protocols have their own -specific subclass. - -HTTPError -- Also a valid HTTP response, so you can treat an HTTP error -as an exceptional event or valid response. - -internals: -BaseHandler and parent -_call_chain conventions - -Example usage: - -import urllib2 - -# set up authentication info -authinfo = urllib2.HTTPBasicAuthHandler() -authinfo.add_password(realm='PDQ Application', - uri='https://mahler:8092/site-updates.py', - user='klem', - passwd='geheim$parole') - -proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"}) - -# build a new opener that adds authentication and caching FTP handlers -opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler) - -# install it -urllib2.install_opener(opener) - -f = urllib2.urlopen('http://www.python.org/') - - -""" - -# XXX issues: -# If an authentication error handler that tries to perform -# authentication for some reason but fails, how should the error be -# signalled? The client needs to know the HTTP error code. But if -# the handler knows that the problem was, e.g., that it didn't know -# that hash algo that requested in the challenge, it would be good to -# pass that information along to the client, too. -# ftp errors aren't handled cleanly -# check digest against correct (i.e. non-apache) implementation - -# Possible extensions: -# complex proxies XXX not sure what exactly was meant by this -# abstract factory for opener - -import base64 -import hashlib -import httplib -import mimetools -import os -import posixpath -import random -import re -import socket -import sys -import time -import urlparse -import bisect -import warnings - -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -# check for SSL -try: - import ssl -except ImportError: - _have_ssl = False -else: - _have_ssl = True - -from urllib import (unwrap, unquote, splittype, splithost, quote, - addinfourl, splitport, splittag, toBytes, - splitattr, ftpwrapper, splituser, splitpasswd, splitvalue) - -# support for FileHandler, proxies via environment variables -from urllib import localhost, url2pathname, getproxies, proxy_bypass - -# used in User-Agent header sent -__version__ = sys.version[:3] - -_opener = None -def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - cafile=None, capath=None, cadefault=False, context=None): - global _opener - if cafile or capath or cadefault: - if context is not None: - raise ValueError( - "You can't pass both context and any of cafile, capath, and " - "cadefault" - ) - if not _have_ssl: - raise ValueError('SSL support not available') - context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, - cafile=cafile, - capath=capath) - https_handler = HTTPSHandler(context=context) - opener = build_opener(https_handler) - elif context: - https_handler = HTTPSHandler(context=context) - opener = build_opener(https_handler) - elif _opener is None: - _opener = opener = build_opener() - else: - opener = _opener - return opener.open(url, data, timeout) - -def install_opener(opener): - global _opener - _opener = opener - -# do these error classes make sense? -# make sure all of the IOError stuff is overridden. we just want to be -# subtypes. - -class URLError(IOError): - # URLError is a sub-type of IOError, but it doesn't share any of - # the implementation. need to override __init__ and __str__. - # It sets self.args for compatibility with other EnvironmentError - # subclasses, but args doesn't have the typical format with errno in - # slot 0 and strerror in slot 1. This may be better than nothing. - def __init__(self, reason): - self.args = reason, - self.reason = reason - - def __str__(self): - return '' % self.reason - -class HTTPError(URLError, addinfourl): - """Raised when HTTP error occurs, but also acts like non-error return""" - __super_init = addinfourl.__init__ - - def __init__(self, url, code, msg, hdrs, fp): - self.code = code - self.msg = msg - self.hdrs = hdrs - self.fp = fp - self.filename = url - # The addinfourl classes depend on fp being a valid file - # object. In some cases, the HTTPError may not have a valid - # file object. If this happens, the simplest workaround is to - # not initialize the base classes. - if fp is not None: - self.__super_init(fp, hdrs, url, code) - - def __str__(self): - return 'HTTP Error %s: %s' % (self.code, self.msg) - - # since URLError specifies a .reason attribute, HTTPError should also - # provide this attribute. See issue13211 fo discussion. - @property - def reason(self): - return self.msg - - def info(self): - return self.hdrs - -# copied from cookielib.py -_cut_port_re = re.compile(r":\d+$") -def request_host(request): - """Return request-host, as defined by RFC 2965. - - Variation from RFC: returned value is lowercased, for convenient - comparison. - - """ - url = request.get_full_url() - host = urlparse.urlparse(url)[1] - if host == "": - host = request.get_header("Host", "") - - # remove port, if present - host = _cut_port_re.sub("", host, 1) - return host.lower() - -class Request: - - def __init__(self, url, data=None, headers={}, - origin_req_host=None, unverifiable=False): - # unwrap('') --> 'type://host/path' - self.__original = unwrap(url) - self.__original, self.__fragment = splittag(self.__original) - self.type = None - # self.__r_type is what's left after doing the splittype - self.host = None - self.port = None - self._tunnel_host = None - self.data = data - self.headers = {} - for key, value in headers.items(): - self.add_header(key, value) - self.unredirected_hdrs = {} - if origin_req_host is None: - origin_req_host = request_host(self) - self.origin_req_host = origin_req_host - self.unverifiable = unverifiable - - def __getattr__(self, attr): - # XXX this is a fallback mechanism to guard against these - # methods getting called in a non-standard order. this may be - # too complicated and/or unnecessary. - # XXX should the __r_XXX attributes be public? - if attr in ('_Request__r_type', '_Request__r_host'): - getattr(self, 'get_' + attr[12:])() - return self.__dict__[attr] - raise AttributeError, attr - - def get_method(self): - if self.has_data(): - return "POST" - else: - return "GET" - - # XXX these helper methods are lame - - def add_data(self, data): - self.data = data - - def has_data(self): - return self.data is not None - - def get_data(self): - return self.data - - def get_full_url(self): - if self.__fragment: - return '%s#%s' % (self.__original, self.__fragment) - else: - return self.__original - - def get_type(self): - if self.type is None: - self.type, self.__r_type = splittype(self.__original) - if self.type is None: - raise ValueError, "unknown url type: %s" % self.__original - return self.type - - def get_host(self): - if self.host is None: - self.host, self.__r_host = splithost(self.__r_type) - if self.host: - self.host = unquote(self.host) - return self.host - - def get_selector(self): - return self.__r_host - - def set_proxy(self, host, type): - if self.type == 'https' and not self._tunnel_host: - self._tunnel_host = self.host - else: - self.type = type - self.__r_host = self.__original - - self.host = host - - def has_proxy(self): - return self.__r_host == self.__original - - def get_origin_req_host(self): - return self.origin_req_host - - def is_unverifiable(self): - return self.unverifiable - - def add_header(self, key, val): - # useful for something like authentication - self.headers[key.capitalize()] = val - - def add_unredirected_header(self, key, val): - # will not be added to a redirected request - self.unredirected_hdrs[key.capitalize()] = val - - def has_header(self, header_name): - return (header_name in self.headers or - header_name in self.unredirected_hdrs) - - def get_header(self, header_name, default=None): - return self.headers.get( - header_name, - self.unredirected_hdrs.get(header_name, default)) - - def header_items(self): - hdrs = self.unredirected_hdrs.copy() - hdrs.update(self.headers) - return hdrs.items() - -class OpenerDirector: - def __init__(self): - client_version = "Python-urllib/%s" % __version__ - self.addheaders = [('User-agent', client_version)] - # self.handlers is retained only for backward compatibility - self.handlers = [] - # manage the individual handlers - self.handle_open = {} - self.handle_error = {} - self.process_response = {} - self.process_request = {} - - def add_handler(self, handler): - if not hasattr(handler, "add_parent"): - raise TypeError("expected BaseHandler instance, got %r" % - type(handler)) - - added = False - for meth in dir(handler): - if meth in ["redirect_request", "do_open", "proxy_open"]: - # oops, coincidental match - continue - - i = meth.find("_") - protocol = meth[:i] - condition = meth[i+1:] - - if condition.startswith("error"): - j = condition.find("_") + i + 1 - kind = meth[j+1:] - try: - kind = int(kind) - except ValueError: - pass - lookup = self.handle_error.get(protocol, {}) - self.handle_error[protocol] = lookup - elif condition == "open": - kind = protocol - lookup = self.handle_open - elif condition == "response": - kind = protocol - lookup = self.process_response - elif condition == "request": - kind = protocol - lookup = self.process_request - else: - continue - - handlers = lookup.setdefault(kind, []) - if handlers: - bisect.insort(handlers, handler) - else: - handlers.append(handler) - added = True - - if added: - bisect.insort(self.handlers, handler) - handler.add_parent(self) - - def close(self): - # Only exists for backwards compatibility. - pass - - def _call_chain(self, chain, kind, meth_name, *args): - # Handlers raise an exception if no one else should try to handle - # the request, or return None if they can't but another handler - # could. Otherwise, they return the response. - handlers = chain.get(kind, ()) - for handler in handlers: - func = getattr(handler, meth_name) - - result = func(*args) - if result is not None: - return result - - def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - # accept a URL or a Request object - if isinstance(fullurl, basestring): - req = Request(fullurl, data) - else: - req = fullurl - if data is not None: - req.add_data(data) - - req.timeout = timeout - protocol = req.get_type() - - # pre-process request - meth_name = protocol+"_request" - for processor in self.process_request.get(protocol, []): - meth = getattr(processor, meth_name) - req = meth(req) - - response = self._open(req, data) - - # post-process response - meth_name = protocol+"_response" - for processor in self.process_response.get(protocol, []): - meth = getattr(processor, meth_name) - response = meth(req, response) - - return response - - def _open(self, req, data=None): - result = self._call_chain(self.handle_open, 'default', - 'default_open', req) - if result: - return result - - protocol = req.get_type() - result = self._call_chain(self.handle_open, protocol, protocol + - '_open', req) - if result: - return result - - return self._call_chain(self.handle_open, 'unknown', - 'unknown_open', req) - - def error(self, proto, *args): - if proto in ('http', 'https'): - # XXX http[s] protocols are special-cased - dict = self.handle_error['http'] # https is not different than http - proto = args[2] # YUCK! - meth_name = 'http_error_%s' % proto - http_err = 1 - orig_args = args - else: - dict = self.handle_error - meth_name = proto + '_error' - http_err = 0 - args = (dict, proto, meth_name) + args - result = self._call_chain(*args) - if result: - return result - - if http_err: - args = (dict, 'default', 'http_error_default') + orig_args - return self._call_chain(*args) - -# XXX probably also want an abstract factory that knows when it makes -# sense to skip a superclass in favor of a subclass and when it might -# make sense to include both - -def build_opener(*handlers): - """Create an opener object from a list of handlers. - - The opener will use several default handlers, including support - for HTTP, FTP and when applicable, HTTPS. - - If any of the handlers passed as arguments are subclasses of the - default handlers, the default handlers will not be used. - """ - import types - def isclass(obj): - return isinstance(obj, (types.ClassType, type)) - - opener = OpenerDirector() - default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, - HTTPDefaultErrorHandler, HTTPRedirectHandler, - FTPHandler, FileHandler, HTTPErrorProcessor] - if hasattr(httplib, 'HTTPS'): - default_classes.append(HTTPSHandler) - skip = set() - for klass in default_classes: - for check in handlers: - if isclass(check): - if issubclass(check, klass): - skip.add(klass) - elif isinstance(check, klass): - skip.add(klass) - for klass in skip: - default_classes.remove(klass) - - for klass in default_classes: - opener.add_handler(klass()) - - for h in handlers: - if isclass(h): - h = h() - opener.add_handler(h) - return opener - -class BaseHandler: - handler_order = 500 - - def add_parent(self, parent): - self.parent = parent - - def close(self): - # Only exists for backwards compatibility - pass - - def __lt__(self, other): - if not hasattr(other, "handler_order"): - # Try to preserve the old behavior of having custom classes - # inserted after default ones (works only for custom user - # classes which are not aware of handler_order). - return True - return self.handler_order < other.handler_order - - -class HTTPErrorProcessor(BaseHandler): - """Process HTTP error responses.""" - handler_order = 1000 # after all other processing - - def http_response(self, request, response): - code, msg, hdrs = response.code, response.msg, response.info() - - # According to RFC 2616, "2xx" code indicates that the client's - # request was successfully received, understood, and accepted. - if not (200 <= code < 300): - response = self.parent.error( - 'http', request, response, code, msg, hdrs) - - return response - - https_response = http_response - -class HTTPDefaultErrorHandler(BaseHandler): - def http_error_default(self, req, fp, code, msg, hdrs): - raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) - -class HTTPRedirectHandler(BaseHandler): - # maximum number of redirections to any single URL - # this is needed because of the state that cookies introduce - max_repeats = 4 - # maximum total number of redirections (regardless of URL) before - # assuming we're in a loop - max_redirections = 10 - - def redirect_request(self, req, fp, code, msg, headers, newurl): - """Return a Request or None in response to a redirect. - - This is called by the http_error_30x methods when a - redirection response is received. If a redirection should - take place, return a new Request to allow http_error_30x to - perform the redirect. Otherwise, raise HTTPError if no-one - else should try to handle this url. Return None if you can't - but another Handler might. - """ - m = req.get_method() - if (code in (301, 302, 303, 307) and m in ("GET", "HEAD") - or code in (301, 302, 303) and m == "POST"): - # Strictly (according to RFC 2616), 301 or 302 in response - # to a POST MUST NOT cause a redirection without confirmation - # from the user (of urllib2, in this case). In practice, - # essentially all clients do redirect in this case, so we - # do the same. - # be conciliant with URIs containing a space - newurl = newurl.replace(' ', '%20') - newheaders = dict((k,v) for k,v in req.headers.items() - if k.lower() not in ("content-length", "content-type") - ) - return Request(newurl, - headers=newheaders, - origin_req_host=req.get_origin_req_host(), - unverifiable=True) - else: - raise HTTPError(req.get_full_url(), code, msg, headers, fp) - - # Implementation note: To avoid the server sending us into an - # infinite loop, the request object needs to track what URLs we - # have already seen. Do this by adding a handler-specific - # attribute to the Request object. - def http_error_302(self, req, fp, code, msg, headers): - # Some servers (incorrectly) return multiple Location headers - # (so probably same goes for URI). Use first header. - if 'location' in headers: - newurl = headers.getheaders('location')[0] - elif 'uri' in headers: - newurl = headers.getheaders('uri')[0] - else: - return - - # fix a possible malformed URL - urlparts = urlparse.urlparse(newurl) - if not urlparts.path and urlparts.netloc: - urlparts = list(urlparts) - urlparts[2] = "/" - newurl = urlparse.urlunparse(urlparts) - - newurl = urlparse.urljoin(req.get_full_url(), newurl) - - # For security reasons we do not allow redirects to protocols - # other than HTTP, HTTPS or FTP. - newurl_lower = newurl.lower() - if not (newurl_lower.startswith('http://') or - newurl_lower.startswith('https://') or - newurl_lower.startswith('ftp://')): - raise HTTPError(newurl, code, - msg + " - Redirection to url '%s' is not allowed" % - newurl, - headers, fp) - - # XXX Probably want to forget about the state of the current - # request, although that might interact poorly with other - # handlers that also use handler-specific request attributes - new = self.redirect_request(req, fp, code, msg, headers, newurl) - if new is None: - return - - # loop detection - # .redirect_dict has a key url if url was previously visited. - if hasattr(req, 'redirect_dict'): - visited = new.redirect_dict = req.redirect_dict - if (visited.get(newurl, 0) >= self.max_repeats or - len(visited) >= self.max_redirections): - raise HTTPError(req.get_full_url(), code, - self.inf_msg + msg, headers, fp) - else: - visited = new.redirect_dict = req.redirect_dict = {} - visited[newurl] = visited.get(newurl, 0) + 1 - - # Don't close the fp until we are sure that we won't use it - # with HTTPError. - fp.read() - fp.close() - - return self.parent.open(new, timeout=req.timeout) - - http_error_301 = http_error_303 = http_error_307 = http_error_302 - - inf_msg = "The HTTP server returned a redirect error that would " \ - "lead to an infinite loop.\n" \ - "The last 30x error message was:\n" - - -def _parse_proxy(proxy): - """Return (scheme, user, password, host/port) given a URL or an authority. - - If a URL is supplied, it must have an authority (host:port) component. - According to RFC 3986, having an authority component means the URL must - have two slashes after the scheme: - - >>> _parse_proxy('file:/ftp.example.com/') - Traceback (most recent call last): - ValueError: proxy URL with no authority: 'file:/ftp.example.com/' - - The first three items of the returned tuple may be None. - - Examples of authority parsing: - - >>> _parse_proxy('proxy.example.com') - (None, None, None, 'proxy.example.com') - >>> _parse_proxy('proxy.example.com:3128') - (None, None, None, 'proxy.example.com:3128') - - The authority component may optionally include userinfo (assumed to be - username:password): - - >>> _parse_proxy('joe:password@proxy.example.com') - (None, 'joe', 'password', 'proxy.example.com') - >>> _parse_proxy('joe:password@proxy.example.com:3128') - (None, 'joe', 'password', 'proxy.example.com:3128') - - Same examples, but with URLs instead: - - >>> _parse_proxy('http://proxy.example.com/') - ('http', None, None, 'proxy.example.com') - >>> _parse_proxy('http://proxy.example.com:3128/') - ('http', None, None, 'proxy.example.com:3128') - >>> _parse_proxy('http://joe:password@proxy.example.com/') - ('http', 'joe', 'password', 'proxy.example.com') - >>> _parse_proxy('http://joe:password@proxy.example.com:3128') - ('http', 'joe', 'password', 'proxy.example.com:3128') - - Everything after the authority is ignored: - - >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') - ('ftp', 'joe', 'password', 'proxy.example.com') - - Test for no trailing '/' case: - - >>> _parse_proxy('http://joe:password@proxy.example.com') - ('http', 'joe', 'password', 'proxy.example.com') - - """ - scheme, r_scheme = splittype(proxy) - if not r_scheme.startswith("/"): - # authority - scheme = None - authority = proxy - else: - # URL - if not r_scheme.startswith("//"): - raise ValueError("proxy URL with no authority: %r" % proxy) - # We have an authority, so for RFC 3986-compliant URLs (by ss 3. - # and 3.3.), path is empty or starts with '/' - end = r_scheme.find("/", 2) - if end == -1: - end = None - authority = r_scheme[2:end] - userinfo, hostport = splituser(authority) - if userinfo is not None: - user, password = splitpasswd(userinfo) - else: - user = password = None - return scheme, user, password, hostport - -class ProxyHandler(BaseHandler): - # Proxies must be in front - handler_order = 100 - - def __init__(self, proxies=None): - if proxies is None: - proxies = getproxies() - assert hasattr(proxies, 'has_key'), "proxies must be a mapping" - self.proxies = proxies - for type, url in proxies.items(): - setattr(self, '%s_open' % type, - lambda r, proxy=url, type=type, meth=self.proxy_open: \ - meth(r, proxy, type)) - - def proxy_open(self, req, proxy, type): - orig_type = req.get_type() - proxy_type, user, password, hostport = _parse_proxy(proxy) - - if proxy_type is None: - proxy_type = orig_type - - if req.host and proxy_bypass(req.host): - return None - - if user and password: - user_pass = '%s:%s' % (unquote(user), unquote(password)) - creds = base64.b64encode(user_pass).strip() - req.add_header('Proxy-authorization', 'Basic ' + creds) - hostport = unquote(hostport) - req.set_proxy(hostport, proxy_type) - - if orig_type == proxy_type or orig_type == 'https': - # let other handlers take care of it - return None - else: - # need to start over, because the other handlers don't - # grok the proxy's URL type - # e.g. if we have a constructor arg proxies like so: - # {'http': 'ftp://proxy.example.com'}, we may end up turning - # a request for http://acme.example.com/a into one for - # ftp://proxy.example.com/a - return self.parent.open(req, timeout=req.timeout) - -class HTTPPasswordMgr: - - def __init__(self): - self.passwd = {} - - def add_password(self, realm, uri, user, passwd): - # uri could be a single URI or a sequence - if isinstance(uri, basestring): - uri = [uri] - if not realm in self.passwd: - self.passwd[realm] = {} - for default_port in True, False: - reduced_uri = tuple( - [self.reduce_uri(u, default_port) for u in uri]) - self.passwd[realm][reduced_uri] = (user, passwd) - - def find_user_password(self, realm, authuri): - domains = self.passwd.get(realm, {}) - for default_port in True, False: - reduced_authuri = self.reduce_uri(authuri, default_port) - for uris, authinfo in domains.iteritems(): - for uri in uris: - if self.is_suburi(uri, reduced_authuri): - return authinfo - return None, None - - def reduce_uri(self, uri, default_port=True): - """Accept authority or URI and extract only the authority and path.""" - # note HTTP URLs do not have a userinfo component - parts = urlparse.urlsplit(uri) - if parts[1]: - # URI - scheme = parts[0] - authority = parts[1] - path = parts[2] or '/' - else: - # host or host:port - scheme = None - authority = uri - path = '/' - host, port = splitport(authority) - if default_port and port is None and scheme is not None: - dport = {"http": 80, - "https": 443, - }.get(scheme) - if dport is not None: - authority = "%s:%d" % (host, dport) - return authority, path - - def is_suburi(self, base, test): - """Check if test is below base in a URI tree - - Both args must be URIs in reduced form. - """ - if base == test: - return True - if base[0] != test[0]: - return False - common = posixpath.commonprefix((base[1], test[1])) - if len(common) == len(base[1]): - return True - return False - - -class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): - - def find_user_password(self, realm, authuri): - user, password = HTTPPasswordMgr.find_user_password(self, realm, - authuri) - if user is not None: - return user, password - return HTTPPasswordMgr.find_user_password(self, None, authuri) - - -class AbstractBasicAuthHandler: - - # XXX this allows for multiple auth-schemes, but will stupidly pick - # the last one with a realm specified. - - # allow for double- and single-quoted realm values - # (single quotes are a violation of the RFC, but appear in the wild) - rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+' - 'realm=(["\']?)([^"\']*)\\2', re.I) - - # XXX could pre-emptively send auth info already accepted (RFC 2617, - # end of section 2, and section 1.2 immediately after "credentials" - # production). - - def __init__(self, password_mgr=None): - if password_mgr is None: - password_mgr = HTTPPasswordMgr() - self.passwd = password_mgr - self.add_password = self.passwd.add_password - - - def http_error_auth_reqed(self, authreq, host, req, headers): - # host may be an authority (without userinfo) or a URL with an - # authority - # XXX could be multiple headers - authreq = headers.get(authreq, None) - - if authreq: - mo = AbstractBasicAuthHandler.rx.search(authreq) - if mo: - scheme, quote, realm = mo.groups() - if quote not in ['"', "'"]: - warnings.warn("Basic Auth Realm was unquoted", - UserWarning, 2) - if scheme.lower() == 'basic': - return self.retry_http_basic_auth(host, req, realm) - - def retry_http_basic_auth(self, host, req, realm): - user, pw = self.passwd.find_user_password(realm, host) - if pw is not None: - raw = "%s:%s" % (user, pw) - auth = 'Basic %s' % base64.b64encode(raw).strip() - if req.get_header(self.auth_header, None) == auth: - return None - req.add_unredirected_header(self.auth_header, auth) - return self.parent.open(req, timeout=req.timeout) - else: - return None - - -class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): - - auth_header = 'Authorization' - - def http_error_401(self, req, fp, code, msg, headers): - url = req.get_full_url() - response = self.http_error_auth_reqed('www-authenticate', - url, req, headers) - return response - - -class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): - - auth_header = 'Proxy-authorization' - - def http_error_407(self, req, fp, code, msg, headers): - # http_error_auth_reqed requires that there is no userinfo component in - # authority. Assume there isn't one, since urllib2 does not (and - # should not, RFC 3986 s. 3.2.1) support requests for URLs containing - # userinfo. - authority = req.get_host() - response = self.http_error_auth_reqed('proxy-authenticate', - authority, req, headers) - return response - - -def randombytes(n): - """Return n random bytes.""" - # Use /dev/urandom if it is available. Fall back to random module - # if not. It might be worthwhile to extend this function to use - # other platform-specific mechanisms for getting random bytes. - if os.path.exists("/dev/urandom"): - f = open("/dev/urandom") - s = f.read(n) - f.close() - return s - else: - L = [chr(random.randrange(0, 256)) for i in range(n)] - return "".join(L) - -class AbstractDigestAuthHandler: - # Digest authentication is specified in RFC 2617. - - # XXX The client does not inspect the Authentication-Info header - # in a successful response. - - # XXX It should be possible to test this implementation against - # a mock server that just generates a static set of challenges. - - # XXX qop="auth-int" supports is shaky - - def __init__(self, passwd=None): - if passwd is None: - passwd = HTTPPasswordMgr() - self.passwd = passwd - self.add_password = self.passwd.add_password - self.retried = 0 - self.nonce_count = 0 - self.last_nonce = None - - def reset_retry_count(self): - self.retried = 0 - - def http_error_auth_reqed(self, auth_header, host, req, headers): - authreq = headers.get(auth_header, None) - if self.retried > 5: - # Don't fail endlessly - if we failed once, we'll probably - # fail a second time. Hm. Unless the Password Manager is - # prompting for the information. Crap. This isn't great - # but it's better than the current 'repeat until recursion - # depth exceeded' approach - raise HTTPError(req.get_full_url(), 401, "digest auth failed", - headers, None) - else: - self.retried += 1 - if authreq: - scheme = authreq.split()[0] - if scheme.lower() == 'digest': - return self.retry_http_digest_auth(req, authreq) - - def retry_http_digest_auth(self, req, auth): - token, challenge = auth.split(' ', 1) - chal = parse_keqv_list(parse_http_list(challenge)) - auth = self.get_authorization(req, chal) - if auth: - auth_val = 'Digest %s' % auth - if req.headers.get(self.auth_header, None) == auth_val: - return None - req.add_unredirected_header(self.auth_header, auth_val) - resp = self.parent.open(req, timeout=req.timeout) - return resp - - def get_cnonce(self, nonce): - # The cnonce-value is an opaque - # quoted string value provided by the client and used by both client - # and server to avoid chosen plaintext attacks, to provide mutual - # authentication, and to provide some message integrity protection. - # This isn't a fabulous effort, but it's probably Good Enough. - dig = hashlib.sha1("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(), - randombytes(8))).hexdigest() - return dig[:16] - - def get_authorization(self, req, chal): - try: - realm = chal['realm'] - nonce = chal['nonce'] - qop = chal.get('qop') - algorithm = chal.get('algorithm', 'MD5') - # mod_digest doesn't send an opaque, even though it isn't - # supposed to be optional - opaque = chal.get('opaque', None) - except KeyError: - return None - - H, KD = self.get_algorithm_impls(algorithm) - if H is None: - return None - - user, pw = self.passwd.find_user_password(realm, req.get_full_url()) - if user is None: - return None - - # XXX not implemented yet - if req.has_data(): - entdig = self.get_entity_digest(req.get_data(), chal) - else: - entdig = None - - A1 = "%s:%s:%s" % (user, realm, pw) - A2 = "%s:%s" % (req.get_method(), - # XXX selector: what about proxies and full urls - req.get_selector()) - if qop == 'auth': - if nonce == self.last_nonce: - self.nonce_count += 1 - else: - self.nonce_count = 1 - self.last_nonce = nonce - - ncvalue = '%08x' % self.nonce_count - cnonce = self.get_cnonce(nonce) - noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) - respdig = KD(H(A1), noncebit) - elif qop is None: - respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) - else: - # XXX handle auth-int. - raise URLError("qop '%s' is not supported." % qop) - - # XXX should the partial digests be encoded too? - - base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ - 'response="%s"' % (user, realm, nonce, req.get_selector(), - respdig) - if opaque: - base += ', opaque="%s"' % opaque - if entdig: - base += ', digest="%s"' % entdig - base += ', algorithm="%s"' % algorithm - if qop: - base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) - return base - - def get_algorithm_impls(self, algorithm): - # algorithm should be case-insensitive according to RFC2617 - algorithm = algorithm.upper() - # lambdas assume digest modules are imported at the top level - if algorithm == 'MD5': - H = lambda x: hashlib.md5(x).hexdigest() - elif algorithm == 'SHA': - H = lambda x: hashlib.sha1(x).hexdigest() - # XXX MD5-sess - else: - raise ValueError("Unsupported digest authentication " - "algorithm %r" % algorithm.lower()) - KD = lambda s, d: H("%s:%s" % (s, d)) - return H, KD - - def get_entity_digest(self, data, chal): - # XXX not implemented yet - return None - - -class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): - """An authentication protocol defined by RFC 2069 - - Digest authentication improves on basic authentication because it - does not transmit passwords in the clear. - """ - - auth_header = 'Authorization' - handler_order = 490 # before Basic auth - - def http_error_401(self, req, fp, code, msg, headers): - host = urlparse.urlparse(req.get_full_url())[1] - retry = self.http_error_auth_reqed('www-authenticate', - host, req, headers) - self.reset_retry_count() - return retry - - -class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): - - auth_header = 'Proxy-Authorization' - handler_order = 490 # before Basic auth - - def http_error_407(self, req, fp, code, msg, headers): - host = req.get_host() - retry = self.http_error_auth_reqed('proxy-authenticate', - host, req, headers) - self.reset_retry_count() - return retry - -class AbstractHTTPHandler(BaseHandler): - - def __init__(self, debuglevel=0): - self._debuglevel = debuglevel - - def set_http_debuglevel(self, level): - self._debuglevel = level - - def do_request_(self, request): - host = request.get_host() - if not host: - raise URLError('no host given') - - if request.has_data(): # POST - data = request.get_data() - if not request.has_header('Content-type'): - request.add_unredirected_header( - 'Content-type', - 'application/x-www-form-urlencoded') - if not request.has_header('Content-length'): - request.add_unredirected_header( - 'Content-length', '%d' % len(data)) - - sel_host = host - if request.has_proxy(): - scheme, sel = splittype(request.get_selector()) - sel_host, sel_path = splithost(sel) - - if not request.has_header('Host'): - request.add_unredirected_header('Host', sel_host) - for name, value in self.parent.addheaders: - name = name.capitalize() - if not request.has_header(name): - request.add_unredirected_header(name, value) - - return request - - def do_open(self, http_class, req, **http_conn_args): - """Return an addinfourl object for the request, using http_class. - - http_class must implement the HTTPConnection API from httplib. - The addinfourl return value is a file-like object. It also - has methods and attributes including: - - info(): return a mimetools.Message object for the headers - - geturl(): return the original request URL - - code: HTTP status code - """ - host = req.get_host() - if not host: - raise URLError('no host given') - - # will parse host:port - h = http_class(host, timeout=req.timeout, **http_conn_args) - h.set_debuglevel(self._debuglevel) - - headers = dict(req.unredirected_hdrs) - headers.update(dict((k, v) for k, v in req.headers.items() - if k not in headers)) - - # We want to make an HTTP/1.1 request, but the addinfourl - # class isn't prepared to deal with a persistent connection. - # It will try to read all remaining data from the socket, - # which will block while the server waits for the next request. - # So make sure the connection gets closed after the (only) - # request. - headers["Connection"] = "close" - headers = dict( - (name.title(), val) for name, val in headers.items()) - - if req._tunnel_host: - tunnel_headers = {} - proxy_auth_hdr = "Proxy-Authorization" - if proxy_auth_hdr in headers: - tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] - # Proxy-Authorization should not be sent to origin - # server. - del headers[proxy_auth_hdr] - h.set_tunnel(req._tunnel_host, headers=tunnel_headers) - - try: - h.request(req.get_method(), req.get_selector(), req.data, headers) - except socket.error, err: # XXX what error? - h.close() - raise URLError(err) - else: - try: - r = h.getresponse(buffering=True) - except TypeError: # buffering kw not supported - r = h.getresponse() - - # Pick apart the HTTPResponse object to get the addinfourl - # object initialized properly. - - # Wrap the HTTPResponse object in socket's file object adapter - # for Windows. That adapter calls recv(), so delegate recv() - # to read(). This weird wrapping allows the returned object to - # have readline() and readlines() methods. - - # XXX It might be better to extract the read buffering code - # out of socket._fileobject() and into a base class. - - r.recv = r.read - fp = socket._fileobject(r, close=True) - - resp = addinfourl(fp, r.msg, req.get_full_url()) - resp.code = r.status - resp.msg = r.reason - return resp - - -class HTTPHandler(AbstractHTTPHandler): - - def http_open(self, req): - return self.do_open(httplib.HTTPConnection, req) - - http_request = AbstractHTTPHandler.do_request_ - -if hasattr(httplib, 'HTTPS'): - class HTTPSHandler(AbstractHTTPHandler): - - def __init__(self, debuglevel=0, context=None): - AbstractHTTPHandler.__init__(self, debuglevel) - self._context = context - - def https_open(self, req): - return self.do_open(httplib.HTTPSConnection, req, - context=self._context) - - https_request = AbstractHTTPHandler.do_request_ - -class HTTPCookieProcessor(BaseHandler): - def __init__(self, cookiejar=None): - import cookielib - if cookiejar is None: - cookiejar = cookielib.CookieJar() - self.cookiejar = cookiejar - - def http_request(self, request): - self.cookiejar.add_cookie_header(request) - return request - - def http_response(self, request, response): - self.cookiejar.extract_cookies(response, request) - return response - - https_request = http_request - https_response = http_response - -class UnknownHandler(BaseHandler): - def unknown_open(self, req): - type = req.get_type() - raise URLError('unknown url type: %s' % type) - -def parse_keqv_list(l): - """Parse list of key=value strings where keys are not duplicated.""" - parsed = {} - for elt in l: - k, v = elt.split('=', 1) - if v[0] == '"' and v[-1] == '"': - v = v[1:-1] - parsed[k] = v - return parsed - -def parse_http_list(s): - """Parse lists as described by RFC 2068 Section 2. - - In particular, parse comma-separated lists where the elements of - the list may include quoted-strings. A quoted-string could - contain a comma. A non-quoted string could have quotes in the - middle. Neither commas nor quotes count if they are escaped. - Only double-quotes count, not single-quotes. - """ - res = [] - part = '' - - escape = quote = False - for cur in s: - if escape: - part += cur - escape = False - continue - if quote: - if cur == '\\': - escape = True - continue - elif cur == '"': - quote = False - part += cur - continue - - if cur == ',': - res.append(part) - part = '' - continue - - if cur == '"': - quote = True - - part += cur - - # append last part - if part: - res.append(part) - - return [part.strip() for part in res] - -def _safe_gethostbyname(host): - try: - return socket.gethostbyname(host) - except socket.gaierror: - return None - -class FileHandler(BaseHandler): - # Use local file or FTP depending on form of URL - def file_open(self, req): - url = req.get_selector() - if url[:2] == '//' and url[2:3] != '/' and (req.host and - req.host != 'localhost'): - req.type = 'ftp' - return self.parent.open(req) - else: - return self.open_local_file(req) - - # names for the localhost - names = None - def get_names(self): - if FileHandler.names is None: - try: - FileHandler.names = tuple( - socket.gethostbyname_ex('localhost')[2] + - socket.gethostbyname_ex(socket.gethostname())[2]) - except socket.gaierror: - FileHandler.names = (socket.gethostbyname('localhost'),) - return FileHandler.names - - # not entirely sure what the rules are here - def open_local_file(self, req): - import email.utils - import mimetypes - host = req.get_host() - filename = req.get_selector() - localfile = url2pathname(filename) - try: - stats = os.stat(localfile) - size = stats.st_size - modified = email.utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(filename)[0] - headers = mimetools.Message(StringIO( - 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) - if host: - host, port = splitport(host) - if not host or \ - (not port and _safe_gethostbyname(host) in self.get_names()): - if host: - origurl = 'file://' + host + filename - else: - origurl = 'file://' + filename - return addinfourl(open(localfile, 'rb'), headers, origurl) - except OSError, msg: - # urllib2 users shouldn't expect OSErrors coming from urlopen() - raise URLError(msg) - raise URLError('file not on local host') - -class FTPHandler(BaseHandler): - def ftp_open(self, req): - import ftplib - import mimetypes - host = req.get_host() - if not host: - raise URLError('ftp error: no host given') - host, port = splitport(host) - if port is None: - port = ftplib.FTP_PORT - else: - port = int(port) - - # username/password handling - user, host = splituser(host) - if user: - user, passwd = splitpasswd(user) - else: - passwd = None - host = unquote(host) - user = user or '' - passwd = passwd or '' - - try: - host = socket.gethostbyname(host) - except socket.error, msg: - raise URLError(msg) - path, attrs = splitattr(req.get_selector()) - dirs = path.split('/') - dirs = map(unquote, dirs) - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: - dirs = dirs[1:] - try: - fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) - type = file and 'I' or 'D' - for attr in attrs: - attr, value = splitvalue(attr) - if attr.lower() == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = value.upper() - fp, retrlen = fw.retrfile(file, type) - headers = "" - mtype = mimetypes.guess_type(req.get_full_url())[0] - if mtype: - headers += "Content-type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: - headers += "Content-length: %d\n" % retrlen - sf = StringIO(headers) - headers = mimetools.Message(sf) - return addinfourl(fp, headers, req.get_full_url()) - except ftplib.all_errors, msg: - raise URLError, ('ftp error: %s' % msg), sys.exc_info()[2] - - def connect_ftp(self, user, passwd, host, port, dirs, timeout): - fw = ftpwrapper(user, passwd, host, port, dirs, timeout, - persistent=False) -## fw.ftp.set_debuglevel(1) - return fw - -class CacheFTPHandler(FTPHandler): - # XXX would be nice to have pluggable cache strategies - # XXX this stuff is definitely not thread safe - def __init__(self): - self.cache = {} - self.timeout = {} - self.soonest = 0 - self.delay = 60 - self.max_conns = 16 - - def setTimeout(self, t): - self.delay = t - - def setMaxConns(self, m): - self.max_conns = m - - def connect_ftp(self, user, passwd, host, port, dirs, timeout): - key = user, host, port, '/'.join(dirs), timeout - if key in self.cache: - self.timeout[key] = time.time() + self.delay - else: - self.cache[key] = ftpwrapper(user, passwd, host, port, dirs, timeout) - self.timeout[key] = time.time() + self.delay - self.check_cache() - return self.cache[key] - - def check_cache(self): - # first check for old ones - t = time.time() - if self.soonest <= t: - for k, v in self.timeout.items(): - if v < t: - self.cache[k].close() - del self.cache[k] - del self.timeout[k] - self.soonest = min(self.timeout.values()) - - # then check the size - if len(self.cache) == self.max_conns: - for k, v in self.timeout.items(): - if v == self.soonest: - del self.cache[k] - del self.timeout[k] - break - self.soonest = min(self.timeout.values()) - - def clear_cache(self): - for conn in self.cache.values(): - conn.close() - self.cache.clear() - self.timeout.clear() diff --git a/python/Lib/urlparse.py b/python/Lib/urlparse.py deleted file mode 100755 index 4cd3d6743a..0000000000 --- a/python/Lib/urlparse.py +++ /dev/null @@ -1,428 +0,0 @@ -"""Parse (absolute and relative) URLs. - -urlparse module is based upon the following RFC specifications. - -RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding -and L. Masinter, January 2005. - -RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter -and L.Masinter, December 1999. - -RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. -Berners-Lee, R. Fielding, and L. Masinter, August 1998. - -RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zwinski, July 1998. - -RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June -1995. - -RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. -McCahill, December 1994 - -RFC 3986 is considered the current standard and any future changes to -urlparse module should conform with it. The urlparse module is -currently not entirely compliant with this RFC due to defacto -scenarios for parsing, and for backward compatibility purposes, some -parsing quirks from older RFCs are retained. The testcases in -test_urlparse.py provides a good indicator of parsing behavior. - -""" - -import re - -__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", - "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"] - -# A classification of schemes ('' means apply by default) -uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', - 'wais', 'file', 'https', 'shttp', 'mms', - 'prospero', 'rtsp', 'rtspu', '', 'sftp', - 'svn', 'svn+ssh'] -uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', - 'imap', 'wais', 'file', 'mms', 'https', 'shttp', - 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', - 'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh'] -uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', - 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', - 'mms', '', 'sftp', 'tel'] - -# These are not actually used anymore, but should stay for backwards -# compatibility. (They are undocumented, but have a public-looking name.) -non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] -uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] -uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', - 'nntp', 'wais', 'https', 'shttp', 'snews', - 'file', 'prospero', ''] - -# Characters valid in scheme names -scheme_chars = ('abcdefghijklmnopqrstuvwxyz' - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - '0123456789' - '+-.') - -MAX_CACHE_SIZE = 20 -_parse_cache = {} - -def clear_cache(): - """Clear the parse cache.""" - _parse_cache.clear() - - -class ResultMixin(object): - """Shared methods for the parsed result objects.""" - - @property - def username(self): - netloc = self.netloc - if "@" in netloc: - userinfo = netloc.rsplit("@", 1)[0] - if ":" in userinfo: - userinfo = userinfo.split(":", 1)[0] - return userinfo - return None - - @property - def password(self): - netloc = self.netloc - if "@" in netloc: - userinfo = netloc.rsplit("@", 1)[0] - if ":" in userinfo: - return userinfo.split(":", 1)[1] - return None - - @property - def hostname(self): - netloc = self.netloc.split('@')[-1] - if '[' in netloc and ']' in netloc: - return netloc.split(']')[0][1:].lower() - elif ':' in netloc: - return netloc.split(':')[0].lower() - elif netloc == '': - return None - else: - return netloc.lower() - - @property - def port(self): - netloc = self.netloc.split('@')[-1].split(']')[-1] - if ':' in netloc: - port = netloc.split(':')[1] - if port: - port = int(port, 10) - # verify legal port - if (0 <= port <= 65535): - return port - return None - -from collections import namedtuple - -class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin): - - __slots__ = () - - def geturl(self): - return urlunsplit(self) - - -class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin): - - __slots__ = () - - def geturl(self): - return urlunparse(self) - - -def urlparse(url, scheme='', allow_fragments=True): - """Parse a URL into 6 components: - :///;?# - Return a 6-tuple: (scheme, netloc, path, params, query, fragment). - Note that we don't break the components up in smaller bits - (e.g. netloc is a single string) and we don't expand % escapes.""" - tuple = urlsplit(url, scheme, allow_fragments) - scheme, netloc, url, query, fragment = tuple - if scheme in uses_params and ';' in url: - url, params = _splitparams(url) - else: - params = '' - return ParseResult(scheme, netloc, url, params, query, fragment) - -def _splitparams(url): - if '/' in url: - i = url.find(';', url.rfind('/')) - if i < 0: - return url, '' - else: - i = url.find(';') - return url[:i], url[i+1:] - -def _splitnetloc(url, start=0): - delim = len(url) # position of end of domain part of url, default is end - for c in '/?#': # look for delimiters; the order is NOT important - wdelim = url.find(c, start) # find first of this delim - if wdelim >= 0: # if found - delim = min(delim, wdelim) # use earliest delim position - return url[start:delim], url[delim:] # return (domain, rest) - -def urlsplit(url, scheme='', allow_fragments=True): - """Parse a URL into 5 components: - :///?# - Return a 5-tuple: (scheme, netloc, path, query, fragment). - Note that we don't break the components up in smaller bits - (e.g. netloc is a single string) and we don't expand % escapes.""" - allow_fragments = bool(allow_fragments) - key = url, scheme, allow_fragments, type(url), type(scheme) - cached = _parse_cache.get(key, None) - if cached: - return cached - if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth - clear_cache() - netloc = query = fragment = '' - i = url.find(':') - if i > 0: - if url[:i] == 'http': # optimize the common case - scheme = url[:i].lower() - url = url[i+1:] - if url[:2] == '//': - netloc, url = _splitnetloc(url, 2) - if (('[' in netloc and ']' not in netloc) or - (']' in netloc and '[' not in netloc)): - raise ValueError("Invalid IPv6 URL") - if allow_fragments and '#' in url: - url, fragment = url.split('#', 1) - if '?' in url: - url, query = url.split('?', 1) - v = SplitResult(scheme, netloc, url, query, fragment) - _parse_cache[key] = v - return v - for c in url[:i]: - if c not in scheme_chars: - break - else: - # make sure "url" is not actually a port number (in which case - # "scheme" is really part of the path) - rest = url[i+1:] - if not rest or any(c not in '0123456789' for c in rest): - # not a port number - scheme, url = url[:i].lower(), rest - - if url[:2] == '//': - netloc, url = _splitnetloc(url, 2) - if (('[' in netloc and ']' not in netloc) or - (']' in netloc and '[' not in netloc)): - raise ValueError("Invalid IPv6 URL") - if allow_fragments and '#' in url: - url, fragment = url.split('#', 1) - if '?' in url: - url, query = url.split('?', 1) - v = SplitResult(scheme, netloc, url, query, fragment) - _parse_cache[key] = v - return v - -def urlunparse(data): - """Put a parsed URL back together again. This may result in a - slightly different, but equivalent URL, if the URL that was parsed - originally had redundant delimiters, e.g. a ? with an empty query - (the draft states that these are equivalent).""" - scheme, netloc, url, params, query, fragment = data - if params: - url = "%s;%s" % (url, params) - return urlunsplit((scheme, netloc, url, query, fragment)) - -def urlunsplit(data): - """Combine the elements of a tuple as returned by urlsplit() into a - complete URL as a string. The data argument can be any five-item iterable. - This may result in a slightly different, but equivalent URL, if the URL that - was parsed originally had unnecessary delimiters (for example, a ? with an - empty query; the RFC states that these are equivalent).""" - scheme, netloc, url, query, fragment = data - if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): - if url and url[:1] != '/': url = '/' + url - url = '//' + (netloc or '') + url - if scheme: - url = scheme + ':' + url - if query: - url = url + '?' + query - if fragment: - url = url + '#' + fragment - return url - -def urljoin(base, url, allow_fragments=True): - """Join a base URL and a possibly relative URL to form an absolute - interpretation of the latter.""" - if not base: - return url - if not url: - return base - bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ - urlparse(base, '', allow_fragments) - scheme, netloc, path, params, query, fragment = \ - urlparse(url, bscheme, allow_fragments) - if scheme != bscheme or scheme not in uses_relative: - return url - if scheme in uses_netloc: - if netloc: - return urlunparse((scheme, netloc, path, - params, query, fragment)) - netloc = bnetloc - if path[:1] == '/': - return urlunparse((scheme, netloc, path, - params, query, fragment)) - if not path and not params: - path = bpath - params = bparams - if not query: - query = bquery - return urlunparse((scheme, netloc, path, - params, query, fragment)) - segments = bpath.split('/')[:-1] + path.split('/') - # XXX The stuff below is bogus in various ways... - if segments[-1] == '.': - segments[-1] = '' - while '.' in segments: - segments.remove('.') - while 1: - i = 1 - n = len(segments) - 1 - while i < n: - if (segments[i] == '..' - and segments[i-1] not in ('', '..')): - del segments[i-1:i+1] - break - i = i+1 - else: - break - if segments == ['', '..']: - segments[-1] = '' - elif len(segments) >= 2 and segments[-1] == '..': - segments[-2:] = [''] - return urlunparse((scheme, netloc, '/'.join(segments), - params, query, fragment)) - -def urldefrag(url): - """Removes any existing fragment from URL. - - Returns a tuple of the defragmented URL and the fragment. If - the URL contained no fragments, the second element is the - empty string. - """ - if '#' in url: - s, n, p, a, q, frag = urlparse(url) - defrag = urlunparse((s, n, p, a, q, '')) - return defrag, frag - else: - return url, '' - -try: - unicode -except NameError: - def _is_unicode(x): - return 0 -else: - def _is_unicode(x): - return isinstance(x, unicode) - -# unquote method for parse_qs and parse_qsl -# Cannot use directly from urllib as it would create a circular reference -# because urllib uses urlparse methods (urljoin). If you update this function, -# update it also in urllib. This code duplication does not existin in Python3. - -_hexdig = '0123456789ABCDEFabcdef' -_hextochr = dict((a+b, chr(int(a+b,16))) - for a in _hexdig for b in _hexdig) -_asciire = re.compile('([\x00-\x7f]+)') - -def unquote(s): - """unquote('abc%20def') -> 'abc def'.""" - if _is_unicode(s): - if '%' not in s: - return s - bits = _asciire.split(s) - res = [bits[0]] - append = res.append - for i in range(1, len(bits), 2): - append(unquote(str(bits[i])).decode('latin1')) - append(bits[i + 1]) - return ''.join(res) - - bits = s.split('%') - # fastpath - if len(bits) == 1: - return s - res = [bits[0]] - append = res.append - for item in bits[1:]: - try: - append(_hextochr[item[:2]]) - append(item[2:]) - except KeyError: - append('%') - append(item) - return ''.join(res) - -def parse_qs(qs, keep_blank_values=0, strict_parsing=0): - """Parse a query given as a string argument. - - Arguments: - - qs: percent-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - percent-encoded queries should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - """ - dict = {} - for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): - if name in dict: - dict[name].append(value) - else: - dict[name] = [value] - return dict - -def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): - """Parse a query given as a string argument. - - Arguments: - - qs: percent-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - percent-encoded queries should be treated as blank strings. A - true value indicates that blanks should be retained as blank - strings. The default false value indicates that blank values - are to be ignored and treated as if they were not included. - - strict_parsing: flag indicating what to do with parsing errors. If - false (the default), errors are silently ignored. If true, - errors raise a ValueError exception. - - Returns a list, as G-d intended. - """ - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - r = [] - for name_value in pairs: - if not name_value and not strict_parsing: - continue - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError, "bad query field: %r" % (name_value,) - # Handle case of a control-name with no equal sign - if keep_blank_values: - nv.append('') - else: - continue - if len(nv[1]) or keep_blank_values: - name = unquote(nv[0].replace('+', ' ')) - value = unquote(nv[1].replace('+', ' ')) - r.append((name, value)) - - return r diff --git a/python/Lib/user.py b/python/Lib/user.py deleted file mode 100755 index 596f0a7462..0000000000 --- a/python/Lib/user.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Hook to allow user-specified customization code to run. - -As a policy, Python doesn't run user-specified code on startup of -Python programs (interactive sessions execute the script specified in -the PYTHONSTARTUP environment variable if it exists). - -However, some programs or sites may find it convenient to allow users -to have a standard customization file, which gets run when a program -requests it. This module implements such a mechanism. A program -that wishes to use the mechanism must execute the statement - - import user - -The user module looks for a file .pythonrc.py in the user's home -directory and if it can be opened, execfile()s it in its own global -namespace. Errors during this phase are not caught; that's up to the -program that imports the user module, if it wishes. - -The user's .pythonrc.py could conceivably test for sys.version if it -wishes to do different things depending on the Python version. - -""" -from warnings import warnpy3k -warnpy3k("the user module has been removed in Python 3.0", stacklevel=2) -del warnpy3k - -import os - -home = os.curdir # Default -if 'HOME' in os.environ: - home = os.environ['HOME'] -elif os.name == 'posix': - home = os.path.expanduser("~/") -elif os.name == 'nt': # Contributed by Jeff Bauer - if 'HOMEPATH' in os.environ: - if 'HOMEDRIVE' in os.environ: - home = os.environ['HOMEDRIVE'] + os.environ['HOMEPATH'] - else: - home = os.environ['HOMEPATH'] - -pythonrc = os.path.join(home, ".pythonrc.py") -try: - f = open(pythonrc) -except IOError: - pass -else: - f.close() - execfile(pythonrc) diff --git a/python/Lib/uu.py b/python/Lib/uu.py deleted file mode 100755 index f8fa4c4757..0000000000 --- a/python/Lib/uu.py +++ /dev/null @@ -1,196 +0,0 @@ -#! /usr/bin/env python - -# Copyright 1994 by Lance Ellinghouse -# Cathedral City, California Republic, United States of America. -# All Rights Reserved -# Permission to use, copy, modify, and distribute this software and its -# documentation for any purpose and without fee is hereby granted, -# provided that the above copyright notice appear in all copies and that -# both that copyright notice and this permission notice appear in -# supporting documentation, and that the name of Lance Ellinghouse -# not be used in advertising or publicity pertaining to distribution -# of the software without specific, written prior permission. -# LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO -# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -# FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT -# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -# -# Modified by Jack Jansen, CWI, July 1995: -# - Use binascii module to do the actual line-by-line conversion -# between ascii and binary. This results in a 1000-fold speedup. The C -# version is still 5 times faster, though. -# - Arguments more compliant with python standard - -"""Implementation of the UUencode and UUdecode functions. - -encode(in_file, out_file [,name, mode]) -decode(in_file [, out_file, mode]) -""" - -import binascii -import os -import sys - -__all__ = ["Error", "encode", "decode"] - -class Error(Exception): - pass - -def encode(in_file, out_file, name=None, mode=None): - """Uuencode file""" - # - # If in_file is a pathname open it and change defaults - # - opened_files = [] - try: - if in_file == '-': - in_file = sys.stdin - elif isinstance(in_file, basestring): - if name is None: - name = os.path.basename(in_file) - if mode is None: - try: - mode = os.stat(in_file).st_mode - except AttributeError: - pass - in_file = open(in_file, 'rb') - opened_files.append(in_file) - # - # Open out_file if it is a pathname - # - if out_file == '-': - out_file = sys.stdout - elif isinstance(out_file, basestring): - out_file = open(out_file, 'wb') - opened_files.append(out_file) - # - # Set defaults for name and mode - # - if name is None: - name = '-' - if mode is None: - mode = 0666 - # - # Write the data - # - out_file.write('begin %o %s\n' % ((mode&0777),name)) - data = in_file.read(45) - while len(data) > 0: - out_file.write(binascii.b2a_uu(data)) - data = in_file.read(45) - out_file.write(' \nend\n') - finally: - for f in opened_files: - f.close() - - -def decode(in_file, out_file=None, mode=None, quiet=0): - """Decode uuencoded file""" - # - # Open the input file, if needed. - # - opened_files = [] - if in_file == '-': - in_file = sys.stdin - elif isinstance(in_file, basestring): - in_file = open(in_file) - opened_files.append(in_file) - try: - # - # Read until a begin is encountered or we've exhausted the file - # - while True: - hdr = in_file.readline() - if not hdr: - raise Error('No valid begin line found in input file') - if not hdr.startswith('begin'): - continue - hdrfields = hdr.split(' ', 2) - if len(hdrfields) == 3 and hdrfields[0] == 'begin': - try: - int(hdrfields[1], 8) - break - except ValueError: - pass - if out_file is None: - out_file = hdrfields[2].rstrip() - if os.path.exists(out_file): - raise Error('Cannot overwrite existing file: %s' % out_file) - if mode is None: - mode = int(hdrfields[1], 8) - # - # Open the output file - # - if out_file == '-': - out_file = sys.stdout - elif isinstance(out_file, basestring): - fp = open(out_file, 'wb') - try: - os.path.chmod(out_file, mode) - except AttributeError: - pass - out_file = fp - opened_files.append(out_file) - # - # Main decoding loop - # - s = in_file.readline() - while s and s.strip() != 'end': - try: - data = binascii.a2b_uu(s) - except binascii.Error, v: - # Workaround for broken uuencoders by /Fredrik Lundh - nbytes = (((ord(s[0])-32) & 63) * 4 + 5) // 3 - data = binascii.a2b_uu(s[:nbytes]) - if not quiet: - sys.stderr.write("Warning: %s\n" % v) - out_file.write(data) - s = in_file.readline() - if not s: - raise Error('Truncated input file') - finally: - for f in opened_files: - f.close() - -def test(): - """uuencode/uudecode main program""" - - import optparse - parser = optparse.OptionParser(usage='usage: %prog [-d] [-t] [input [output]]') - parser.add_option('-d', '--decode', dest='decode', help='Decode (instead of encode)?', default=False, action='store_true') - parser.add_option('-t', '--text', dest='text', help='data is text, encoded format unix-compatible text?', default=False, action='store_true') - - (options, args) = parser.parse_args() - if len(args) > 2: - parser.error('incorrect number of arguments') - sys.exit(1) - - input = sys.stdin - output = sys.stdout - if len(args) > 0: - input = args[0] - if len(args) > 1: - output = args[1] - - if options.decode: - if options.text: - if isinstance(output, basestring): - output = open(output, 'w') - else: - print sys.argv[0], ': cannot do -t to stdout' - sys.exit(1) - decode(input, output) - else: - if options.text: - if isinstance(input, basestring): - input = open(input, 'r') - else: - print sys.argv[0], ': cannot do -t from stdin' - sys.exit(1) - encode(input, output) - -if __name__ == '__main__': - test() diff --git a/python/Lib/uuid.py b/python/Lib/uuid.py deleted file mode 100755 index 7432032df0..0000000000 --- a/python/Lib/uuid.py +++ /dev/null @@ -1,597 +0,0 @@ -r"""UUID objects (universally unique identifiers) according to RFC 4122. - -This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5 -UUIDs as specified in RFC 4122. - -If all you want is a unique ID, you should probably call uuid1() or uuid4(). -Note that uuid1() may compromise privacy since it creates a UUID containing -the computer's network address. uuid4() creates a random UUID. - -Typical usage: - - >>> import uuid - - # make a UUID based on the host ID and current time - >>> uuid.uuid1() - UUID('a8098c1a-f86e-11da-bd1a-00112444be1e') - - # make a UUID using an MD5 hash of a namespace UUID and a name - >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org') - UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e') - - # make a random UUID - >>> uuid.uuid4() - UUID('16fd2706-8baf-433b-82eb-8c7fada847da') - - # make a UUID using a SHA-1 hash of a namespace UUID and a name - >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org') - UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d') - - # make a UUID from a string of hex digits (braces and hyphens ignored) - >>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}') - - # convert a UUID to a string of hex digits in standard form - >>> str(x) - '00010203-0405-0607-0809-0a0b0c0d0e0f' - - # get the raw 16 bytes of the UUID - >>> x.bytes - '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f' - - # make a UUID from a 16-byte string - >>> uuid.UUID(bytes=x.bytes) - UUID('00010203-0405-0607-0809-0a0b0c0d0e0f') -""" - -import os - -__author__ = 'Ka-Ping Yee ' - -RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [ - 'reserved for NCS compatibility', 'specified in RFC 4122', - 'reserved for Microsoft compatibility', 'reserved for future definition'] - -class UUID(object): - """Instances of the UUID class represent UUIDs as specified in RFC 4122. - UUID objects are immutable, hashable, and usable as dictionary keys. - Converting a UUID to a string with str() yields something in the form - '12345678-1234-1234-1234-123456789abc'. The UUID constructor accepts - five possible forms: a similar string of hexadecimal digits, or a tuple - of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and - 48-bit values respectively) as an argument named 'fields', or a string - of 16 bytes (with all the integer fields in big-endian order) as an - argument named 'bytes', or a string of 16 bytes (with the first three - fields in little-endian order) as an argument named 'bytes_le', or a - single 128-bit integer as an argument named 'int'. - - UUIDs have these read-only attributes: - - bytes the UUID as a 16-byte string (containing the six - integer fields in big-endian byte order) - - bytes_le the UUID as a 16-byte string (with time_low, time_mid, - and time_hi_version in little-endian byte order) - - fields a tuple of the six integer fields of the UUID, - which are also available as six individual attributes - and two derived attributes: - - time_low the first 32 bits of the UUID - time_mid the next 16 bits of the UUID - time_hi_version the next 16 bits of the UUID - clock_seq_hi_variant the next 8 bits of the UUID - clock_seq_low the next 8 bits of the UUID - node the last 48 bits of the UUID - - time the 60-bit timestamp - clock_seq the 14-bit sequence number - - hex the UUID as a 32-character hexadecimal string - - int the UUID as a 128-bit integer - - urn the UUID as a URN as specified in RFC 4122 - - variant the UUID variant (one of the constants RESERVED_NCS, - RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE) - - version the UUID version number (1 through 5, meaningful only - when the variant is RFC_4122) - """ - - def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, - int=None, version=None): - r"""Create a UUID from either a string of 32 hexadecimal digits, - a string of 16 bytes as the 'bytes' argument, a string of 16 bytes - in little-endian order as the 'bytes_le' argument, a tuple of six - integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version, - 8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as - the 'fields' argument, or a single 128-bit integer as the 'int' - argument. When a string of hex digits is given, curly braces, - hyphens, and a URN prefix are all optional. For example, these - expressions all yield the same UUID: - - UUID('{12345678-1234-5678-1234-567812345678}') - UUID('12345678123456781234567812345678') - UUID('urn:uuid:12345678-1234-5678-1234-567812345678') - UUID(bytes='\x12\x34\x56\x78'*4) - UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' + - '\x12\x34\x56\x78\x12\x34\x56\x78') - UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678)) - UUID(int=0x12345678123456781234567812345678) - - Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must - be given. The 'version' argument is optional; if given, the resulting - UUID will have its variant and version set according to RFC 4122, - overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'. - """ - - if [hex, bytes, bytes_le, fields, int].count(None) != 4: - raise TypeError('need one of hex, bytes, bytes_le, fields, or int') - if hex is not None: - hex = hex.replace('urn:', '').replace('uuid:', '') - hex = hex.strip('{}').replace('-', '') - if len(hex) != 32: - raise ValueError('badly formed hexadecimal UUID string') - int = long(hex, 16) - if bytes_le is not None: - if len(bytes_le) != 16: - raise ValueError('bytes_le is not a 16-char string') - bytes = (bytes_le[3] + bytes_le[2] + bytes_le[1] + bytes_le[0] + - bytes_le[5] + bytes_le[4] + bytes_le[7] + bytes_le[6] + - bytes_le[8:]) - if bytes is not None: - if len(bytes) != 16: - raise ValueError('bytes is not a 16-char string') - int = long(('%02x'*16) % tuple(map(ord, bytes)), 16) - if fields is not None: - if len(fields) != 6: - raise ValueError('fields is not a 6-tuple') - (time_low, time_mid, time_hi_version, - clock_seq_hi_variant, clock_seq_low, node) = fields - if not 0 <= time_low < 1<<32L: - raise ValueError('field 1 out of range (need a 32-bit value)') - if not 0 <= time_mid < 1<<16L: - raise ValueError('field 2 out of range (need a 16-bit value)') - if not 0 <= time_hi_version < 1<<16L: - raise ValueError('field 3 out of range (need a 16-bit value)') - if not 0 <= clock_seq_hi_variant < 1<<8L: - raise ValueError('field 4 out of range (need an 8-bit value)') - if not 0 <= clock_seq_low < 1<<8L: - raise ValueError('field 5 out of range (need an 8-bit value)') - if not 0 <= node < 1<<48L: - raise ValueError('field 6 out of range (need a 48-bit value)') - clock_seq = (clock_seq_hi_variant << 8L) | clock_seq_low - int = ((time_low << 96L) | (time_mid << 80L) | - (time_hi_version << 64L) | (clock_seq << 48L) | node) - if int is not None: - if not 0 <= int < 1<<128L: - raise ValueError('int is out of range (need a 128-bit value)') - if version is not None: - if not 1 <= version <= 5: - raise ValueError('illegal version number') - # Set the variant to RFC 4122. - int &= ~(0xc000 << 48L) - int |= 0x8000 << 48L - # Set the version number. - int &= ~(0xf000 << 64L) - int |= version << 76L - self.__dict__['int'] = int - - def __cmp__(self, other): - if isinstance(other, UUID): - return cmp(self.int, other.int) - return NotImplemented - - def __hash__(self): - return hash(self.int) - - def __int__(self): - return self.int - - def __repr__(self): - return 'UUID(%r)' % str(self) - - def __setattr__(self, name, value): - raise TypeError('UUID objects are immutable') - - def __str__(self): - hex = '%032x' % self.int - return '%s-%s-%s-%s-%s' % ( - hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:]) - - def get_bytes(self): - bytes = '' - for shift in range(0, 128, 8): - bytes = chr((self.int >> shift) & 0xff) + bytes - return bytes - - bytes = property(get_bytes) - - def get_bytes_le(self): - bytes = self.bytes - return (bytes[3] + bytes[2] + bytes[1] + bytes[0] + - bytes[5] + bytes[4] + bytes[7] + bytes[6] + bytes[8:]) - - bytes_le = property(get_bytes_le) - - def get_fields(self): - return (self.time_low, self.time_mid, self.time_hi_version, - self.clock_seq_hi_variant, self.clock_seq_low, self.node) - - fields = property(get_fields) - - def get_time_low(self): - return self.int >> 96L - - time_low = property(get_time_low) - - def get_time_mid(self): - return (self.int >> 80L) & 0xffff - - time_mid = property(get_time_mid) - - def get_time_hi_version(self): - return (self.int >> 64L) & 0xffff - - time_hi_version = property(get_time_hi_version) - - def get_clock_seq_hi_variant(self): - return (self.int >> 56L) & 0xff - - clock_seq_hi_variant = property(get_clock_seq_hi_variant) - - def get_clock_seq_low(self): - return (self.int >> 48L) & 0xff - - clock_seq_low = property(get_clock_seq_low) - - def get_time(self): - return (((self.time_hi_version & 0x0fffL) << 48L) | - (self.time_mid << 32L) | self.time_low) - - time = property(get_time) - - def get_clock_seq(self): - return (((self.clock_seq_hi_variant & 0x3fL) << 8L) | - self.clock_seq_low) - - clock_seq = property(get_clock_seq) - - def get_node(self): - return self.int & 0xffffffffffff - - node = property(get_node) - - def get_hex(self): - return '%032x' % self.int - - hex = property(get_hex) - - def get_urn(self): - return 'urn:uuid:' + str(self) - - urn = property(get_urn) - - def get_variant(self): - if not self.int & (0x8000 << 48L): - return RESERVED_NCS - elif not self.int & (0x4000 << 48L): - return RFC_4122 - elif not self.int & (0x2000 << 48L): - return RESERVED_MICROSOFT - else: - return RESERVED_FUTURE - - variant = property(get_variant) - - def get_version(self): - # The version bits are only meaningful for RFC 4122 UUIDs. - if self.variant == RFC_4122: - return int((self.int >> 76L) & 0xf) - - version = property(get_version) - -def _popen(command, args): - import os - path = os.environ.get("PATH", os.defpath).split(os.pathsep) - path.extend(('/sbin', '/usr/sbin')) - for dir in path: - executable = os.path.join(dir, command) - if (os.path.exists(executable) and - os.access(executable, os.F_OK | os.X_OK) and - not os.path.isdir(executable)): - break - else: - return None - # LC_ALL to ensure English output, 2>/dev/null to prevent output on - # stderr (Note: we don't have an example where the words we search for - # are actually localized, but in theory some system could do so.) - cmd = 'LC_ALL=C %s %s 2>/dev/null' % (executable, args) - return os.popen(cmd) - -def _find_mac(command, args, hw_identifiers, get_index): - try: - pipe = _popen(command, args) - if not pipe: - return - with pipe: - for line in pipe: - words = line.lower().rstrip().split() - for i in range(len(words)): - if words[i] in hw_identifiers: - try: - word = words[get_index(i)] - mac = int(word.replace(':', ''), 16) - if mac: - return mac - except (ValueError, IndexError): - # Virtual interfaces, such as those provided by - # VPNs, do not have a colon-delimited MAC address - # as expected, but a 16-byte HWAddr separated by - # dashes. These should be ignored in favor of a - # real MAC address - pass - except IOError: - pass - -def _ifconfig_getnode(): - """Get the hardware address on Unix by running ifconfig.""" - # This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes. - for args in ('', '-a', '-av'): - mac = _find_mac('ifconfig', args, ['hwaddr', 'ether'], lambda i: i+1) - if mac: - return mac - -def _arp_getnode(): - """Get the hardware address on Unix by running arp.""" - import os, socket - try: - ip_addr = socket.gethostbyname(socket.gethostname()) - except EnvironmentError: - return None - - # Try getting the MAC addr from arp based on our IP address (Solaris). - return _find_mac('arp', '-an', [ip_addr], lambda i: -1) - -def _lanscan_getnode(): - """Get the hardware address on Unix by running lanscan.""" - # This might work on HP-UX. - return _find_mac('lanscan', '-ai', ['lan0'], lambda i: 0) - -def _netstat_getnode(): - """Get the hardware address on Unix by running netstat.""" - # This might work on AIX, Tru64 UNIX and presumably on IRIX. - try: - pipe = _popen('netstat', '-ia') - if not pipe: - return - with pipe: - words = pipe.readline().rstrip().split() - try: - i = words.index('Address') - except ValueError: - return - for line in pipe: - try: - words = line.rstrip().split() - word = words[i] - if len(word) == 17 and word.count(':') == 5: - mac = int(word.replace(':', ''), 16) - if mac: - return mac - except (ValueError, IndexError): - pass - except OSError: - pass - -def _ipconfig_getnode(): - """Get the hardware address on Windows by running ipconfig.exe.""" - import os, re - dirs = ['', r'c:\windows\system32', r'c:\winnt\system32'] - try: - import ctypes - buffer = ctypes.create_string_buffer(300) - ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300) - dirs.insert(0, buffer.value.decode('mbcs')) - except: - pass - for dir in dirs: - try: - pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all') - except IOError: - continue - with pipe: - for line in pipe: - value = line.split(':')[-1].strip().lower() - if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value): - return int(value.replace('-', ''), 16) - -def _netbios_getnode(): - """Get the hardware address on Windows using NetBIOS calls. - See http://support.microsoft.com/kb/118623 for details.""" - import win32wnet, netbios - ncb = netbios.NCB() - ncb.Command = netbios.NCBENUM - ncb.Buffer = adapters = netbios.LANA_ENUM() - adapters._pack() - if win32wnet.Netbios(ncb) != 0: - return - adapters._unpack() - for i in range(adapters.length): - ncb.Reset() - ncb.Command = netbios.NCBRESET - ncb.Lana_num = ord(adapters.lana[i]) - if win32wnet.Netbios(ncb) != 0: - continue - ncb.Reset() - ncb.Command = netbios.NCBASTAT - ncb.Lana_num = ord(adapters.lana[i]) - ncb.Callname = '*'.ljust(16) - ncb.Buffer = status = netbios.ADAPTER_STATUS() - if win32wnet.Netbios(ncb) != 0: - continue - status._unpack() - bytes = map(ord, status.adapter_address) - return ((bytes[0]<<40L) + (bytes[1]<<32L) + (bytes[2]<<24L) + - (bytes[3]<<16L) + (bytes[4]<<8L) + bytes[5]) - -# Thanks to Thomas Heller for ctypes and for his help with its use here. - -# If ctypes is available, use it to find system routines for UUID generation. -_uuid_generate_time = _UuidCreate = None -try: - import ctypes, ctypes.util - import sys - - # The uuid_generate_* routines are provided by libuuid on at least - # Linux and FreeBSD, and provided by libc on Mac OS X. - _libnames = ['uuid'] - if not sys.platform.startswith('win'): - _libnames.append('c') - for libname in _libnames: - try: - lib = ctypes.CDLL(ctypes.util.find_library(libname)) - except: - continue - if hasattr(lib, 'uuid_generate_time'): - _uuid_generate_time = lib.uuid_generate_time - break - del _libnames - - # The uuid_generate_* functions are broken on MacOS X 10.5, as noted - # in issue #8621 the function generates the same sequence of values - # in the parent process and all children created using fork (unless - # those children use exec as well). - # - # Assume that the uuid_generate functions are broken from 10.5 onward, - # the test can be adjusted when a later version is fixed. - if sys.platform == 'darwin': - import os - if int(os.uname()[2].split('.')[0]) >= 9: - _uuid_generate_time = None - - # On Windows prior to 2000, UuidCreate gives a UUID containing the - # hardware address. On Windows 2000 and later, UuidCreate makes a - # random UUID and UuidCreateSequential gives a UUID containing the - # hardware address. These routines are provided by the RPC runtime. - # NOTE: at least on Tim's WinXP Pro SP2 desktop box, while the last - # 6 bytes returned by UuidCreateSequential are fixed, they don't appear - # to bear any relationship to the MAC address of any network device - # on the box. - try: - lib = ctypes.windll.rpcrt4 - except: - lib = None - _UuidCreate = getattr(lib, 'UuidCreateSequential', - getattr(lib, 'UuidCreate', None)) -except: - pass - -def _unixdll_getnode(): - """Get the hardware address on Unix using ctypes.""" - _buffer = ctypes.create_string_buffer(16) - _uuid_generate_time(_buffer) - return UUID(bytes=_buffer.raw).node - -def _windll_getnode(): - """Get the hardware address on Windows using ctypes.""" - _buffer = ctypes.create_string_buffer(16) - if _UuidCreate(_buffer) == 0: - return UUID(bytes=_buffer.raw).node - -def _random_getnode(): - """Get a random node ID, with eighth bit set as suggested by RFC 4122.""" - import random - return random.randrange(0, 1<<48L) | 0x010000000000L - -_node = None - -def getnode(): - """Get the hardware address as a 48-bit positive integer. - - The first time this runs, it may launch a separate program, which could - be quite slow. If all attempts to obtain the hardware address fail, we - choose a random 48-bit number with its eighth bit set to 1 as recommended - in RFC 4122. - """ - - global _node - if _node is not None: - return _node - - import sys - if sys.platform == 'win32': - getters = [_windll_getnode, _netbios_getnode, _ipconfig_getnode] - else: - getters = [_unixdll_getnode, _ifconfig_getnode, _arp_getnode, - _lanscan_getnode, _netstat_getnode] - - for getter in getters + [_random_getnode]: - try: - _node = getter() - except: - continue - if _node is not None: - return _node - -_last_timestamp = None - -def uuid1(node=None, clock_seq=None): - """Generate a UUID from a host ID, sequence number, and the current time. - If 'node' is not given, getnode() is used to obtain the hardware - address. If 'clock_seq' is given, it is used as the sequence number; - otherwise a random 14-bit sequence number is chosen.""" - - # When the system provides a version-1 UUID generator, use it (but don't - # use UuidCreate here because its UUIDs don't conform to RFC 4122). - if _uuid_generate_time and node is clock_seq is None: - _buffer = ctypes.create_string_buffer(16) - _uuid_generate_time(_buffer) - return UUID(bytes=_buffer.raw) - - global _last_timestamp - import time - nanoseconds = int(time.time() * 1e9) - # 0x01b21dd213814000 is the number of 100-ns intervals between the - # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. - timestamp = int(nanoseconds//100) + 0x01b21dd213814000L - if _last_timestamp is not None and timestamp <= _last_timestamp: - timestamp = _last_timestamp + 1 - _last_timestamp = timestamp - if clock_seq is None: - import random - clock_seq = random.randrange(1<<14L) # instead of stable storage - time_low = timestamp & 0xffffffffL - time_mid = (timestamp >> 32L) & 0xffffL - time_hi_version = (timestamp >> 48L) & 0x0fffL - clock_seq_low = clock_seq & 0xffL - clock_seq_hi_variant = (clock_seq >> 8L) & 0x3fL - if node is None: - node = getnode() - return UUID(fields=(time_low, time_mid, time_hi_version, - clock_seq_hi_variant, clock_seq_low, node), version=1) - -def uuid3(namespace, name): - """Generate a UUID from the MD5 hash of a namespace UUID and a name.""" - from hashlib import md5 - hash = md5(namespace.bytes + name).digest() - return UUID(bytes=hash[:16], version=3) - -def uuid4(): - """Generate a random UUID.""" - return UUID(bytes=os.urandom(16), version=4) - -def uuid5(namespace, name): - """Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" - from hashlib import sha1 - hash = sha1(namespace.bytes + name).digest() - return UUID(bytes=hash[:16], version=5) - -# The following standard UUIDs are for use with uuid3() or uuid5(). - -NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8') -NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8') -NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8') -NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8') diff --git a/python/Lib/warnings.py b/python/Lib/warnings.py deleted file mode 100755 index b0d53aa032..0000000000 --- a/python/Lib/warnings.py +++ /dev/null @@ -1,422 +0,0 @@ -"""Python part of the warnings subsystem.""" - -# Note: function level imports should *not* be used -# in this module as it may cause import lock deadlock. -# See bug 683658. -import linecache -import sys -import types - -__all__ = ["warn", "warn_explicit", "showwarning", - "formatwarning", "filterwarnings", "simplefilter", - "resetwarnings", "catch_warnings"] - - -def warnpy3k(message, category=None, stacklevel=1): - """Issue a deprecation warning for Python 3.x related changes. - - Warnings are omitted unless Python is started with the -3 option. - """ - if sys.py3kwarning: - if category is None: - category = DeprecationWarning - warn(message, category, stacklevel+1) - -def _show_warning(message, category, filename, lineno, file=None, line=None): - """Hook to write a warning to a file; replace if you like.""" - if file is None: - file = sys.stderr - if file is None: - # sys.stderr is None - warnings get lost - return - try: - file.write(formatwarning(message, category, filename, lineno, line)) - except (IOError, UnicodeError): - pass # the file (probably stderr) is invalid - this warning gets lost. -# Keep a working version around in case the deprecation of the old API is -# triggered. -showwarning = _show_warning - -def formatwarning(message, category, filename, lineno, line=None): - """Function to format a warning the standard way.""" - try: - unicodetype = unicode - except NameError: - unicodetype = () - try: - message = str(message) - except UnicodeEncodeError: - pass - s = "%s: %s: %s\n" % (lineno, category.__name__, message) - line = linecache.getline(filename, lineno) if line is None else line - if line: - line = line.strip() - if isinstance(s, unicodetype) and isinstance(line, str): - line = unicode(line, 'latin1') - s += " %s\n" % line - if isinstance(s, unicodetype) and isinstance(filename, str): - enc = sys.getfilesystemencoding() - if enc: - try: - filename = unicode(filename, enc) - except UnicodeDecodeError: - pass - s = "%s:%s" % (filename, s) - return s - -def filterwarnings(action, message="", category=Warning, module="", lineno=0, - append=0): - """Insert an entry into the list of warnings filters (at the front). - - 'action' -- one of "error", "ignore", "always", "default", "module", - or "once" - 'message' -- a regex that the warning message must match - 'category' -- a class that the warning must be a subclass of - 'module' -- a regex that the module name must match - 'lineno' -- an integer line number, 0 matches all warnings - 'append' -- if true, append to the list of filters - """ - import re - assert action in ("error", "ignore", "always", "default", "module", - "once"), "invalid action: %r" % (action,) - assert isinstance(message, basestring), "message must be a string" - assert isinstance(category, (type, types.ClassType)), \ - "category must be a class" - assert issubclass(category, Warning), "category must be a Warning subclass" - assert isinstance(module, basestring), "module must be a string" - assert isinstance(lineno, int) and lineno >= 0, \ - "lineno must be an int >= 0" - item = (action, re.compile(message, re.I), category, - re.compile(module), lineno) - if append: - filters.append(item) - else: - filters.insert(0, item) - -def simplefilter(action, category=Warning, lineno=0, append=0): - """Insert a simple entry into the list of warnings filters (at the front). - - A simple filter matches all modules and messages. - 'action' -- one of "error", "ignore", "always", "default", "module", - or "once" - 'category' -- a class that the warning must be a subclass of - 'lineno' -- an integer line number, 0 matches all warnings - 'append' -- if true, append to the list of filters - """ - assert action in ("error", "ignore", "always", "default", "module", - "once"), "invalid action: %r" % (action,) - assert isinstance(lineno, int) and lineno >= 0, \ - "lineno must be an int >= 0" - item = (action, None, category, None, lineno) - if append: - filters.append(item) - else: - filters.insert(0, item) - -def resetwarnings(): - """Clear the list of warning filters, so that no filters are active.""" - filters[:] = [] - -class _OptionError(Exception): - """Exception used by option processing helpers.""" - pass - -# Helper to process -W options passed via sys.warnoptions -def _processoptions(args): - for arg in args: - try: - _setoption(arg) - except _OptionError, msg: - print >>sys.stderr, "Invalid -W option ignored:", msg - -# Helper for _processoptions() -def _setoption(arg): - import re - parts = arg.split(':') - if len(parts) > 5: - raise _OptionError("too many fields (max 5): %r" % (arg,)) - while len(parts) < 5: - parts.append('') - action, message, category, module, lineno = [s.strip() - for s in parts] - action = _getaction(action) - message = re.escape(message) - category = _getcategory(category) - module = re.escape(module) - if module: - module = module + '$' - if lineno: - try: - lineno = int(lineno) - if lineno < 0: - raise ValueError - except (ValueError, OverflowError): - raise _OptionError("invalid lineno %r" % (lineno,)) - else: - lineno = 0 - filterwarnings(action, message, category, module, lineno) - -# Helper for _setoption() -def _getaction(action): - if not action: - return "default" - if action == "all": return "always" # Alias - for a in ('default', 'always', 'ignore', 'module', 'once', 'error'): - if a.startswith(action): - return a - raise _OptionError("invalid action: %r" % (action,)) - -# Helper for _setoption() -def _getcategory(category): - import re - if not category: - return Warning - if re.match("^[a-zA-Z0-9_]+$", category): - try: - cat = eval(category) - except NameError: - raise _OptionError("unknown warning category: %r" % (category,)) - else: - i = category.rfind(".") - module = category[:i] - klass = category[i+1:] - try: - m = __import__(module, None, None, [klass]) - except ImportError: - raise _OptionError("invalid module name: %r" % (module,)) - try: - cat = getattr(m, klass) - except AttributeError: - raise _OptionError("unknown warning category: %r" % (category,)) - if not issubclass(cat, Warning): - raise _OptionError("invalid warning category: %r" % (category,)) - return cat - - -# Code typically replaced by _warnings -def warn(message, category=None, stacklevel=1): - """Issue a warning, or maybe ignore it or raise an exception.""" - # Check if message is already a Warning object - if isinstance(message, Warning): - category = message.__class__ - # Check category argument - if category is None: - category = UserWarning - assert issubclass(category, Warning) - # Get context information - try: - caller = sys._getframe(stacklevel) - except ValueError: - globals = sys.__dict__ - lineno = 1 - else: - globals = caller.f_globals - lineno = caller.f_lineno - if '__name__' in globals: - module = globals['__name__'] - else: - module = "" - filename = globals.get('__file__') - if filename: - fnl = filename.lower() - if fnl.endswith((".pyc", ".pyo")): - filename = filename[:-1] - else: - if module == "__main__": - try: - filename = sys.argv[0] - except AttributeError: - # embedded interpreters don't have sys.argv, see bug #839151 - filename = '__main__' - if not filename: - filename = module - registry = globals.setdefault("__warningregistry__", {}) - warn_explicit(message, category, filename, lineno, module, registry, - globals) - -def warn_explicit(message, category, filename, lineno, - module=None, registry=None, module_globals=None): - lineno = int(lineno) - if module is None: - module = filename or "" - if module[-3:].lower() == ".py": - module = module[:-3] # XXX What about leading pathname? - if registry is None: - registry = {} - if isinstance(message, Warning): - text = str(message) - category = message.__class__ - else: - text = message - message = category(message) - key = (text, category, lineno) - # Quick test for common case - if registry.get(key): - return - # Search the filters - for item in filters: - action, msg, cat, mod, ln = item - if ((msg is None or msg.match(text)) and - issubclass(category, cat) and - (mod is None or mod.match(module)) and - (ln == 0 or lineno == ln)): - break - else: - action = defaultaction - # Early exit actions - if action == "ignore": - registry[key] = 1 - return - - # Prime the linecache for formatting, in case the - # "file" is actually in a zipfile or something. - linecache.getlines(filename, module_globals) - - if action == "error": - raise message - # Other actions - if action == "once": - registry[key] = 1 - oncekey = (text, category) - if onceregistry.get(oncekey): - return - onceregistry[oncekey] = 1 - elif action == "always": - pass - elif action == "module": - registry[key] = 1 - altkey = (text, category, 0) - if registry.get(altkey): - return - registry[altkey] = 1 - elif action == "default": - registry[key] = 1 - else: - # Unrecognized actions are errors - raise RuntimeError( - "Unrecognized action (%r) in warnings.filters:\n %s" % - (action, item)) - # Print message and context - showwarning(message, category, filename, lineno) - - -class WarningMessage(object): - - """Holds the result of a single showwarning() call.""" - - _WARNING_DETAILS = ("message", "category", "filename", "lineno", "file", - "line") - - def __init__(self, message, category, filename, lineno, file=None, - line=None): - local_values = locals() - for attr in self._WARNING_DETAILS: - setattr(self, attr, local_values[attr]) - self._category_name = category.__name__ if category else None - - def __str__(self): - return ("{message : %r, category : %r, filename : %r, lineno : %s, " - "line : %r}" % (self.message, self._category_name, - self.filename, self.lineno, self.line)) - - -class catch_warnings(object): - - """A context manager that copies and restores the warnings filter upon - exiting the context. - - The 'record' argument specifies whether warnings should be captured by a - custom implementation of warnings.showwarning() and be appended to a list - returned by the context manager. Otherwise None is returned by the context - manager. The objects appended to the list are arguments whose attributes - mirror the arguments to showwarning(). - - The 'module' argument is to specify an alternative module to the module - named 'warnings' and imported under that name. This argument is only useful - when testing the warnings module itself. - - """ - - def __init__(self, record=False, module=None): - """Specify whether to record warnings and if an alternative module - should be used other than sys.modules['warnings']. - - For compatibility with Python 3.0, please consider all arguments to be - keyword-only. - - """ - self._record = record - self._module = sys.modules['warnings'] if module is None else module - self._entered = False - - def __repr__(self): - args = [] - if self._record: - args.append("record=True") - if self._module is not sys.modules['warnings']: - args.append("module=%r" % self._module) - name = type(self).__name__ - return "%s(%s)" % (name, ", ".join(args)) - - def __enter__(self): - if self._entered: - raise RuntimeError("Cannot enter %r twice" % self) - self._entered = True - self._filters = self._module.filters - self._module.filters = self._filters[:] - self._showwarning = self._module.showwarning - if self._record: - log = [] - def showwarning(*args, **kwargs): - log.append(WarningMessage(*args, **kwargs)) - self._module.showwarning = showwarning - return log - else: - return None - - def __exit__(self, *exc_info): - if not self._entered: - raise RuntimeError("Cannot exit %r without entering first" % self) - self._module.filters = self._filters - self._module.showwarning = self._showwarning - - -# filters contains a sequence of filter 5-tuples -# The components of the 5-tuple are: -# - an action: error, ignore, always, default, module, or once -# - a compiled regex that must match the warning message -# - a class representing the warning category -# - a compiled regex that must match the module that is being warned -# - a line number for the line being warning, or 0 to mean any line -# If either if the compiled regexs are None, match anything. -_warnings_defaults = False -try: - from _warnings import (filters, default_action, once_registry, - warn, warn_explicit) - defaultaction = default_action - onceregistry = once_registry - _warnings_defaults = True -except ImportError: - filters = [] - defaultaction = "default" - onceregistry = {} - - -# Module initialization -_processoptions(sys.warnoptions) -if not _warnings_defaults: - silence = [ImportWarning, PendingDeprecationWarning] - # Don't silence DeprecationWarning if -3 or -Q was used. - if not sys.py3kwarning and not sys.flags.division_warning: - silence.append(DeprecationWarning) - for cls in silence: - simplefilter("ignore", category=cls) - bytes_warning = sys.flags.bytes_warning - if bytes_warning > 1: - bytes_action = "error" - elif bytes_warning: - bytes_action = "default" - else: - bytes_action = "ignore" - simplefilter(bytes_action, category=BytesWarning, append=1) -del _warnings_defaults diff --git a/python/Lib/wave.py b/python/Lib/wave.py deleted file mode 100755 index 9e554cb9cc..0000000000 --- a/python/Lib/wave.py +++ /dev/null @@ -1,517 +0,0 @@ -"""Stuff to parse WAVE files. - -Usage. - -Reading WAVE files: - f = wave.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -When the setpos() and rewind() methods are not used, the seek() -method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' for linear samples) - getcompname() -- returns human-readable version of - compression type ('not compressed' linear samples) - getparams() -- returns a tuple consisting of all of the - above in the above order - getmarkers() -- returns None (for compatibility with the - aifc module) - getmark(id) -- raises an error since the mark does not - exist (for compatibility with the aifc module) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell() and the position given to setpos() -are compatible and have nothing to do with the actual position in the -file. -The close() method is called automatically when the class instance -is destroyed. - -Writing WAVE files: - f = wave.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple) - -- set all parameters at once - tell() -- return current position in output file - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes('') or -close() to patch up the sizes in the header. -The close() method is called automatically when the class instance -is destroyed. -""" - -import __builtin__ - -__all__ = ["open", "openfp", "Error"] - -class Error(Exception): - pass - -WAVE_FORMAT_PCM = 0x0001 - -_array_fmts = None, 'b', 'h', None, 'i' - -import struct -import sys -from chunk import Chunk - -def _byteswap3(data): - ba = bytearray(data) - ba[::3] = data[2::3] - ba[2::3] = data[::3] - return bytes(ba) - -class Wave_read: - """Variables used in this class: - - These variables are available to the user though appropriate - methods of this class: - _file -- the open file with methods read(), close(), and seek() - set through the __init__() method - _nchannels -- the number of audio channels - available through the getnchannels() method - _nframes -- the number of audio frames - available through the getnframes() method - _sampwidth -- the number of bytes per audio sample - available through the getsampwidth() method - _framerate -- the sampling frequency - available through the getframerate() method - _comptype -- the AIFF-C compression type ('NONE' if AIFF) - available through the getcomptype() method - _compname -- the human-readable AIFF-C compression type - available through the getcomptype() method - _soundpos -- the position in the audio stream - available through the tell() method, set through the - setpos() method - - These variables are used internally only: - _fmt_chunk_read -- 1 iff the FMT chunk has been read - _data_seek_needed -- 1 iff positioned correctly in audio - file for readframes() - _data_chunk -- instantiation of a chunk class for the DATA chunk - _framesize -- size of one frame in the file - """ - - def initfp(self, file): - self._convert = None - self._soundpos = 0 - self._file = Chunk(file, bigendian = 0) - if self._file.getname() != 'RIFF': - raise Error, 'file does not start with RIFF id' - if self._file.read(4) != 'WAVE': - raise Error, 'not a WAVE file' - self._fmt_chunk_read = 0 - self._data_chunk = None - while 1: - self._data_seek_needed = 1 - try: - chunk = Chunk(self._file, bigendian = 0) - except EOFError: - break - chunkname = chunk.getname() - if chunkname == 'fmt ': - self._read_fmt_chunk(chunk) - self._fmt_chunk_read = 1 - elif chunkname == 'data': - if not self._fmt_chunk_read: - raise Error, 'data chunk before fmt chunk' - self._data_chunk = chunk - self._nframes = chunk.chunksize // self._framesize - self._data_seek_needed = 0 - break - chunk.skip() - if not self._fmt_chunk_read or not self._data_chunk: - raise Error, 'fmt chunk and/or data chunk missing' - - def __init__(self, f): - self._i_opened_the_file = None - if isinstance(f, basestring): - f = __builtin__.open(f, 'rb') - self._i_opened_the_file = f - # else, assume it is an open file object already - try: - self.initfp(f) - except: - if self._i_opened_the_file: - f.close() - raise - - def __del__(self): - self.close() - # - # User visible methods. - # - def getfp(self): - return self._file - - def rewind(self): - self._data_seek_needed = 1 - self._soundpos = 0 - - def close(self): - self._file = None - file = self._i_opened_the_file - if file: - self._i_opened_the_file = None - file.close() - - def tell(self): - return self._soundpos - - def getnchannels(self): - return self._nchannels - - def getnframes(self): - return self._nframes - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - - def getparams(self): - return self.getnchannels(), self.getsampwidth(), \ - self.getframerate(), self.getnframes(), \ - self.getcomptype(), self.getcompname() - - def getmarkers(self): - return None - - def getmark(self, id): - raise Error, 'no marks' - - def setpos(self, pos): - if pos < 0 or pos > self._nframes: - raise Error, 'position not in range' - self._soundpos = pos - self._data_seek_needed = 1 - - def readframes(self, nframes): - if self._data_seek_needed: - self._data_chunk.seek(0, 0) - pos = self._soundpos * self._framesize - if pos: - self._data_chunk.seek(pos, 0) - self._data_seek_needed = 0 - if nframes == 0: - return '' - if self._sampwidth in (2, 4) and sys.byteorder == 'big': - # unfortunately the fromfile() method does not take - # something that only looks like a file object, so - # we have to reach into the innards of the chunk object - import array - chunk = self._data_chunk - data = array.array(_array_fmts[self._sampwidth]) - assert data.itemsize == self._sampwidth - nitems = nframes * self._nchannels - if nitems * self._sampwidth > chunk.chunksize - chunk.size_read: - nitems = (chunk.chunksize - chunk.size_read) // self._sampwidth - data.fromfile(chunk.file.file, nitems) - # "tell" data chunk how much was read - chunk.size_read = chunk.size_read + nitems * self._sampwidth - # do the same for the outermost chunk - chunk = chunk.file - chunk.size_read = chunk.size_read + nitems * self._sampwidth - data.byteswap() - data = data.tostring() - else: - data = self._data_chunk.read(nframes * self._framesize) - if self._sampwidth == 3 and sys.byteorder == 'big': - data = _byteswap3(data) - if self._convert and data: - data = self._convert(data) - self._soundpos = self._soundpos + len(data) // (self._nchannels * self._sampwidth) - return data - - # - # Internal methods. - # - - def _read_fmt_chunk(self, chunk): - wFormatTag, self._nchannels, self._framerate, dwAvgBytesPerSec, wBlockAlign = struct.unpack(' 4: - raise Error, 'bad sample width' - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._sampwidth: - raise Error, 'sample width not set' - return self._sampwidth - - def setframerate(self, framerate): - if self._datawritten: - raise Error, 'cannot change parameters after starting to write' - if framerate <= 0: - raise Error, 'bad frame rate' - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error, 'frame rate not set' - return self._framerate - - def setnframes(self, nframes): - if self._datawritten: - raise Error, 'cannot change parameters after starting to write' - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, comptype, compname): - if self._datawritten: - raise Error, 'cannot change parameters after starting to write' - if comptype not in ('NONE',): - raise Error, 'unsupported compression type' - self._comptype = comptype - self._compname = compname - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - - def setparams(self, params): - nchannels, sampwidth, framerate, nframes, comptype, compname = params - if self._datawritten: - raise Error, 'cannot change parameters after starting to write' - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - if not self._nchannels or not self._sampwidth or not self._framerate: - raise Error, 'not all parameters set' - return self._nchannels, self._sampwidth, self._framerate, \ - self._nframes, self._comptype, self._compname - - def setmark(self, id, pos, name): - raise Error, 'setmark() not supported' - - def getmark(self, id): - raise Error, 'no marks' - - def getmarkers(self): - return None - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - self._ensure_header_written(len(data)) - nframes = len(data) // (self._sampwidth * self._nchannels) - if self._convert: - data = self._convert(data) - if self._sampwidth in (2, 4) and sys.byteorder == 'big': - import array - a = array.array(_array_fmts[self._sampwidth]) - a.fromstring(data) - data = a - assert data.itemsize == self._sampwidth - data.byteswap() - data.tofile(self._file) - self._datawritten = self._datawritten + len(data) * self._sampwidth - else: - if self._sampwidth == 3 and sys.byteorder == 'big': - data = _byteswap3(data) - self._file.write(data) - self._datawritten = self._datawritten + len(data) - self._nframeswritten = self._nframeswritten + nframes - - def writeframes(self, data): - self.writeframesraw(data) - if self._datalength != self._datawritten: - self._patchheader() - - def close(self): - try: - if self._file: - self._ensure_header_written(0) - if self._datalength != self._datawritten: - self._patchheader() - self._file.flush() - finally: - self._file = None - file = self._i_opened_the_file - if file: - self._i_opened_the_file = None - file.close() - - # - # Internal methods. - # - - def _ensure_header_written(self, datasize): - if not self._headerwritten: - if not self._nchannels: - raise Error, '# channels not specified' - if not self._sampwidth: - raise Error, 'sample width not specified' - if not self._framerate: - raise Error, 'sampling rate not specified' - self._write_header(datasize) - - def _write_header(self, initlength): - assert not self._headerwritten - self._file.write('RIFF') - if not self._nframes: - self._nframes = initlength / (self._nchannels * self._sampwidth) - self._datalength = self._nframes * self._nchannels * self._sampwidth - self._form_length_pos = self._file.tell() - self._file.write(struct.pack(' 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - def remove(wr, selfref=ref(self)): - self = selfref() - if self is not None: - if self._iterating: - self._pending_removals.append(wr.key) - else: - del self.data[wr.key] - self._remove = remove - # A list of keys to be removed - self._pending_removals = [] - self._iterating = set() - UserDict.UserDict.__init__(self, *args, **kw) - - def _commit_removals(self): - l = self._pending_removals - d = self.data - # We shouldn't encounter any KeyError, because this method should - # always be called *before* mutating the dict. - while l: - del d[l.pop()] - - def __getitem__(self, key): - o = self.data[key]() - if o is None: - raise KeyError, key - else: - return o - - def __delitem__(self, key): - if self._pending_removals: - self._commit_removals() - del self.data[key] - - def __contains__(self, key): - try: - o = self.data[key]() - except KeyError: - return False - return o is not None - - def has_key(self, key): - try: - o = self.data[key]() - except KeyError: - return False - return o is not None - - def __repr__(self): - return "" % id(self) - - def __setitem__(self, key, value): - if self._pending_removals: - self._commit_removals() - self.data[key] = KeyedRef(value, self._remove, key) - - def clear(self): - if self._pending_removals: - self._commit_removals() - self.data.clear() - - def copy(self): - new = WeakValueDictionary() - for key, wr in self.data.items(): - o = wr() - if o is not None: - new[key] = o - return new - - __copy__ = copy - - def __deepcopy__(self, memo): - from copy import deepcopy - new = self.__class__() - for key, wr in self.data.items(): - o = wr() - if o is not None: - new[deepcopy(key, memo)] = o - return new - - def get(self, key, default=None): - try: - wr = self.data[key] - except KeyError: - return default - else: - o = wr() - if o is None: - # This should only happen - return default - else: - return o - - def items(self): - L = [] - for key, wr in self.data.items(): - o = wr() - if o is not None: - L.append((key, o)) - return L - - def iteritems(self): - with _IterationGuard(self): - for wr in self.data.itervalues(): - value = wr() - if value is not None: - yield wr.key, value - - def iterkeys(self): - with _IterationGuard(self): - for k in self.data.iterkeys(): - yield k - - __iter__ = iterkeys - - def itervaluerefs(self): - """Return an iterator that yields the weak references to the values. - - The references are not guaranteed to be 'live' at the time - they are used, so the result of calling the references needs - to be checked before being used. This can be used to avoid - creating references that will cause the garbage collector to - keep the values around longer than needed. - - """ - with _IterationGuard(self): - for wr in self.data.itervalues(): - yield wr - - def itervalues(self): - with _IterationGuard(self): - for wr in self.data.itervalues(): - obj = wr() - if obj is not None: - yield obj - - def popitem(self): - if self._pending_removals: - self._commit_removals() - while 1: - key, wr = self.data.popitem() - o = wr() - if o is not None: - return key, o - - def pop(self, key, *args): - if self._pending_removals: - self._commit_removals() - try: - o = self.data.pop(key)() - except KeyError: - if args: - return args[0] - raise - if o is None: - raise KeyError, key - else: - return o - - def setdefault(self, key, default=None): - try: - wr = self.data[key] - except KeyError: - if self._pending_removals: - self._commit_removals() - self.data[key] = KeyedRef(default, self._remove, key) - return default - else: - return wr() - - def update(*args, **kwargs): - if not args: - raise TypeError("descriptor 'update' of 'WeakValueDictionary' " - "object needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - dict = args[0] if args else None - if self._pending_removals: - self._commit_removals() - d = self.data - if dict is not None: - if not hasattr(dict, "items"): - dict = type({})(dict) - for key, o in dict.items(): - d[key] = KeyedRef(o, self._remove, key) - if len(kwargs): - self.update(kwargs) - - def valuerefs(self): - """Return a list of weak references to the values. - - The references are not guaranteed to be 'live' at the time - they are used, so the result of calling the references needs - to be checked before being used. This can be used to avoid - creating references that will cause the garbage collector to - keep the values around longer than needed. - - """ - return self.data.values() - - def values(self): - L = [] - for wr in self.data.values(): - o = wr() - if o is not None: - L.append(o) - return L - - -class KeyedRef(ref): - """Specialized reference that includes a key corresponding to the value. - - This is used in the WeakValueDictionary to avoid having to create - a function object for each key stored in the mapping. A shared - callback object can use the 'key' attribute of a KeyedRef instead - of getting a reference to the key from an enclosing scope. - - """ - - __slots__ = "key", - - def __new__(type, ob, callback, key): - self = ref.__new__(type, ob, callback) - self.key = key - return self - - def __init__(self, ob, callback, key): - super(KeyedRef, self).__init__(ob, callback) - - -class WeakKeyDictionary(UserDict.UserDict): - """ Mapping class that references keys weakly. - - Entries in the dictionary will be discarded when there is no - longer a strong reference to the key. This can be used to - associate additional data with an object owned by other parts of - an application without adding attributes to those objects. This - can be especially useful with objects that override attribute - accesses. - """ - - def __init__(self, dict=None): - self.data = {} - def remove(k, selfref=ref(self)): - self = selfref() - if self is not None: - if self._iterating: - self._pending_removals.append(k) - else: - del self.data[k] - self._remove = remove - # A list of dead weakrefs (keys to be removed) - self._pending_removals = [] - self._iterating = set() - if dict is not None: - self.update(dict) - - def _commit_removals(self): - # NOTE: We don't need to call this method before mutating the dict, - # because a dead weakref never compares equal to a live weakref, - # even if they happened to refer to equal objects. - # However, it means keys may already have been removed. - l = self._pending_removals - d = self.data - while l: - try: - del d[l.pop()] - except KeyError: - pass - - def __delitem__(self, key): - del self.data[ref(key)] - - def __getitem__(self, key): - return self.data[ref(key)] - - def __repr__(self): - return "" % id(self) - - def __setitem__(self, key, value): - self.data[ref(key, self._remove)] = value - - def copy(self): - new = WeakKeyDictionary() - for key, value in self.data.items(): - o = key() - if o is not None: - new[o] = value - return new - - __copy__ = copy - - def __deepcopy__(self, memo): - from copy import deepcopy - new = self.__class__() - for key, value in self.data.items(): - o = key() - if o is not None: - new[o] = deepcopy(value, memo) - return new - - def get(self, key, default=None): - return self.data.get(ref(key),default) - - def has_key(self, key): - try: - wr = ref(key) - except TypeError: - return 0 - return wr in self.data - - def __contains__(self, key): - try: - wr = ref(key) - except TypeError: - return 0 - return wr in self.data - - def items(self): - L = [] - for key, value in self.data.items(): - o = key() - if o is not None: - L.append((o, value)) - return L - - def iteritems(self): - with _IterationGuard(self): - for wr, value in self.data.iteritems(): - key = wr() - if key is not None: - yield key, value - - def iterkeyrefs(self): - """Return an iterator that yields the weak references to the keys. - - The references are not guaranteed to be 'live' at the time - they are used, so the result of calling the references needs - to be checked before being used. This can be used to avoid - creating references that will cause the garbage collector to - keep the keys around longer than needed. - - """ - with _IterationGuard(self): - for wr in self.data.iterkeys(): - yield wr - - def iterkeys(self): - with _IterationGuard(self): - for wr in self.data.iterkeys(): - obj = wr() - if obj is not None: - yield obj - - __iter__ = iterkeys - - def itervalues(self): - with _IterationGuard(self): - for value in self.data.itervalues(): - yield value - - def keyrefs(self): - """Return a list of weak references to the keys. - - The references are not guaranteed to be 'live' at the time - they are used, so the result of calling the references needs - to be checked before being used. This can be used to avoid - creating references that will cause the garbage collector to - keep the keys around longer than needed. - - """ - return self.data.keys() - - def keys(self): - L = [] - for wr in self.data.keys(): - o = wr() - if o is not None: - L.append(o) - return L - - def popitem(self): - while 1: - key, value = self.data.popitem() - o = key() - if o is not None: - return o, value - - def pop(self, key, *args): - return self.data.pop(ref(key), *args) - - def setdefault(self, key, default=None): - return self.data.setdefault(ref(key, self._remove),default) - - def update(self, dict=None, **kwargs): - d = self.data - if dict is not None: - if not hasattr(dict, "items"): - dict = type({})(dict) - for key, value in dict.items(): - d[ref(key, self._remove)] = value - if len(kwargs): - self.update(kwargs) diff --git a/python/Lib/webbrowser.py b/python/Lib/webbrowser.py deleted file mode 100755 index cf9eeb4a82..0000000000 --- a/python/Lib/webbrowser.py +++ /dev/null @@ -1,705 +0,0 @@ -#! /usr/bin/env python -"""Interfaces for launching and remotely controlling Web browsers.""" -# Maintained by Georg Brandl. - -import os -import shlex -import sys -import stat -import subprocess -import time - -__all__ = ["Error", "open", "open_new", "open_new_tab", "get", "register"] - -class Error(Exception): - pass - -_browsers = {} # Dictionary of available browser controllers -_tryorder = [] # Preference order of available browsers - -def register(name, klass, instance=None, update_tryorder=1): - """Register a browser connector and, optionally, connection.""" - _browsers[name.lower()] = [klass, instance] - if update_tryorder > 0: - _tryorder.append(name) - elif update_tryorder < 0: - _tryorder.insert(0, name) - -def get(using=None): - """Return a browser launcher instance appropriate for the environment.""" - if using is not None: - alternatives = [using] - else: - alternatives = _tryorder - for browser in alternatives: - if '%s' in browser: - # User gave us a command line, split it into name and args - browser = shlex.split(browser) - if browser[-1] == '&': - return BackgroundBrowser(browser[:-1]) - else: - return GenericBrowser(browser) - else: - # User gave us a browser name or path. - try: - command = _browsers[browser.lower()] - except KeyError: - command = _synthesize(browser) - if command[1] is not None: - return command[1] - elif command[0] is not None: - return command[0]() - raise Error("could not locate runnable browser") - -# Please note: the following definition hides a builtin function. -# It is recommended one does "import webbrowser" and uses webbrowser.open(url) -# instead of "from webbrowser import *". - -def open(url, new=0, autoraise=True): - for name in _tryorder: - browser = get(name) - if browser.open(url, new, autoraise): - return True - return False - -def open_new(url): - return open(url, 1) - -def open_new_tab(url): - return open(url, 2) - - -def _synthesize(browser, update_tryorder=1): - """Attempt to synthesize a controller base on existing controllers. - - This is useful to create a controller when a user specifies a path to - an entry in the BROWSER environment variable -- we can copy a general - controller to operate using a specific installation of the desired - browser in this way. - - If we can't create a controller in this way, or if there is no - executable for the requested browser, return [None, None]. - - """ - cmd = browser.split()[0] - if not _iscommand(cmd): - return [None, None] - name = os.path.basename(cmd) - try: - command = _browsers[name.lower()] - except KeyError: - return [None, None] - # now attempt to clone to fit the new name: - controller = command[1] - if controller and name.lower() == controller.basename: - import copy - controller = copy.copy(controller) - controller.name = browser - controller.basename = os.path.basename(browser) - register(browser, None, controller, update_tryorder) - return [None, controller] - return [None, None] - - -if sys.platform[:3] == "win": - def _isexecutable(cmd): - cmd = cmd.lower() - if os.path.isfile(cmd) and cmd.endswith((".exe", ".bat")): - return True - for ext in ".exe", ".bat": - if os.path.isfile(cmd + ext): - return True - return False -else: - def _isexecutable(cmd): - if os.path.isfile(cmd): - mode = os.stat(cmd)[stat.ST_MODE] - if mode & stat.S_IXUSR or mode & stat.S_IXGRP or mode & stat.S_IXOTH: - return True - return False - -def _iscommand(cmd): - """Return True if cmd is executable or can be found on the executable - search path.""" - if _isexecutable(cmd): - return True - path = os.environ.get("PATH") - if not path: - return False - for d in path.split(os.pathsep): - exe = os.path.join(d, cmd) - if _isexecutable(exe): - return True - return False - - -# General parent classes - -class BaseBrowser(object): - """Parent class for all browsers. Do not use directly.""" - - args = ['%s'] - - def __init__(self, name=""): - self.name = name - self.basename = name - - def open(self, url, new=0, autoraise=True): - raise NotImplementedError - - def open_new(self, url): - return self.open(url, 1) - - def open_new_tab(self, url): - return self.open(url, 2) - - -class GenericBrowser(BaseBrowser): - """Class for all browsers started with a command - and without remote functionality.""" - - def __init__(self, name): - if isinstance(name, basestring): - self.name = name - self.args = ["%s"] - else: - # name should be a list with arguments - self.name = name[0] - self.args = name[1:] - self.basename = os.path.basename(self.name) - - def open(self, url, new=0, autoraise=True): - cmdline = [self.name] + [arg.replace("%s", url) - for arg in self.args] - try: - if sys.platform[:3] == 'win': - p = subprocess.Popen(cmdline) - else: - p = subprocess.Popen(cmdline, close_fds=True) - return not p.wait() - except OSError: - return False - - -class BackgroundBrowser(GenericBrowser): - """Class for all browsers which are to be started in the - background.""" - - def open(self, url, new=0, autoraise=True): - cmdline = [self.name] + [arg.replace("%s", url) - for arg in self.args] - try: - if sys.platform[:3] == 'win': - p = subprocess.Popen(cmdline) - else: - setsid = getattr(os, 'setsid', None) - if not setsid: - setsid = getattr(os, 'setpgrp', None) - p = subprocess.Popen(cmdline, close_fds=True, preexec_fn=setsid) - return (p.poll() is None) - except OSError: - return False - - -class UnixBrowser(BaseBrowser): - """Parent class for all Unix browsers with remote functionality.""" - - raise_opts = None - remote_args = ['%action', '%s'] - remote_action = None - remote_action_newwin = None - remote_action_newtab = None - background = False - redirect_stdout = True - - def _invoke(self, args, remote, autoraise): - raise_opt = [] - if remote and self.raise_opts: - # use autoraise argument only for remote invocation - autoraise = int(autoraise) - opt = self.raise_opts[autoraise] - if opt: raise_opt = [opt] - - cmdline = [self.name] + raise_opt + args - - if remote or self.background: - inout = file(os.devnull, "r+") - else: - # for TTY browsers, we need stdin/out - inout = None - # if possible, put browser in separate process group, so - # keyboard interrupts don't affect browser as well as Python - setsid = getattr(os, 'setsid', None) - if not setsid: - setsid = getattr(os, 'setpgrp', None) - - p = subprocess.Popen(cmdline, close_fds=True, stdin=inout, - stdout=(self.redirect_stdout and inout or None), - stderr=inout, preexec_fn=setsid) - if remote: - # wait five seconds. If the subprocess is not finished, the - # remote invocation has (hopefully) started a new instance. - time.sleep(1) - rc = p.poll() - if rc is None: - time.sleep(4) - rc = p.poll() - if rc is None: - return True - # if remote call failed, open() will try direct invocation - return not rc - elif self.background: - if p.poll() is None: - return True - else: - return False - else: - return not p.wait() - - def open(self, url, new=0, autoraise=True): - if new == 0: - action = self.remote_action - elif new == 1: - action = self.remote_action_newwin - elif new == 2: - if self.remote_action_newtab is None: - action = self.remote_action_newwin - else: - action = self.remote_action_newtab - else: - raise Error("Bad 'new' parameter to open(); " + - "expected 0, 1, or 2, got %s" % new) - - args = [arg.replace("%s", url).replace("%action", action) - for arg in self.remote_args] - success = self._invoke(args, True, autoraise) - if not success: - # remote invocation failed, try straight way - args = [arg.replace("%s", url) for arg in self.args] - return self._invoke(args, False, False) - else: - return True - - -class Mozilla(UnixBrowser): - """Launcher class for Mozilla/Netscape browsers.""" - - raise_opts = ["-noraise", "-raise"] - remote_args = ['-remote', 'openURL(%s%action)'] - remote_action = "" - remote_action_newwin = ",new-window" - remote_action_newtab = ",new-tab" - background = True - -Netscape = Mozilla - - -class Galeon(UnixBrowser): - """Launcher class for Galeon/Epiphany browsers.""" - - raise_opts = ["-noraise", ""] - remote_args = ['%action', '%s'] - remote_action = "-n" - remote_action_newwin = "-w" - background = True - - -class Chrome(UnixBrowser): - "Launcher class for Google Chrome browser." - - remote_args = ['%action', '%s'] - remote_action = "" - remote_action_newwin = "--new-window" - remote_action_newtab = "" - background = True - -Chromium = Chrome - - -class Opera(UnixBrowser): - "Launcher class for Opera browser." - - raise_opts = ["-noraise", ""] - remote_args = ['-remote', 'openURL(%s%action)'] - remote_action = "" - remote_action_newwin = ",new-window" - remote_action_newtab = ",new-page" - background = True - - -class Elinks(UnixBrowser): - "Launcher class for Elinks browsers." - - remote_args = ['-remote', 'openURL(%s%action)'] - remote_action = "" - remote_action_newwin = ",new-window" - remote_action_newtab = ",new-tab" - background = False - - # elinks doesn't like its stdout to be redirected - - # it uses redirected stdout as a signal to do -dump - redirect_stdout = False - - -class Konqueror(BaseBrowser): - """Controller for the KDE File Manager (kfm, or Konqueror). - - See the output of ``kfmclient --commands`` - for more information on the Konqueror remote-control interface. - """ - - def open(self, url, new=0, autoraise=True): - # XXX Currently I know no way to prevent KFM from opening a new win. - if new == 2: - action = "newTab" - else: - action = "openURL" - - devnull = file(os.devnull, "r+") - # if possible, put browser in separate process group, so - # keyboard interrupts don't affect browser as well as Python - setsid = getattr(os, 'setsid', None) - if not setsid: - setsid = getattr(os, 'setpgrp', None) - - try: - p = subprocess.Popen(["kfmclient", action, url], - close_fds=True, stdin=devnull, - stdout=devnull, stderr=devnull) - except OSError: - # fall through to next variant - pass - else: - p.wait() - # kfmclient's return code unfortunately has no meaning as it seems - return True - - try: - p = subprocess.Popen(["konqueror", "--silent", url], - close_fds=True, stdin=devnull, - stdout=devnull, stderr=devnull, - preexec_fn=setsid) - except OSError: - # fall through to next variant - pass - else: - if p.poll() is None: - # Should be running now. - return True - - try: - p = subprocess.Popen(["kfm", "-d", url], - close_fds=True, stdin=devnull, - stdout=devnull, stderr=devnull, - preexec_fn=setsid) - except OSError: - return False - else: - return (p.poll() is None) - - -class Grail(BaseBrowser): - # There should be a way to maintain a connection to Grail, but the - # Grail remote control protocol doesn't really allow that at this - # point. It probably never will! - def _find_grail_rc(self): - import glob - import pwd - import socket - import tempfile - tempdir = os.path.join(tempfile.gettempdir(), - ".grail-unix") - user = pwd.getpwuid(os.getuid())[0] - filename = os.path.join(tempdir, user + "-*") - maybes = glob.glob(filename) - if not maybes: - return None - s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - for fn in maybes: - # need to PING each one until we find one that's live - try: - s.connect(fn) - except socket.error: - # no good; attempt to clean it out, but don't fail: - try: - os.unlink(fn) - except IOError: - pass - else: - return s - - def _remote(self, action): - s = self._find_grail_rc() - if not s: - return 0 - s.send(action) - s.close() - return 1 - - def open(self, url, new=0, autoraise=True): - if new: - ok = self._remote("LOADNEW " + url) - else: - ok = self._remote("LOAD " + url) - return ok - - -# -# Platform support for Unix -# - -# These are the right tests because all these Unix browsers require either -# a console terminal or an X display to run. - -def register_X_browsers(): - - # use xdg-open if around - if _iscommand("xdg-open"): - register("xdg-open", None, BackgroundBrowser("xdg-open")) - - # The default GNOME3 browser - if "GNOME_DESKTOP_SESSION_ID" in os.environ and _iscommand("gvfs-open"): - register("gvfs-open", None, BackgroundBrowser("gvfs-open")) - - # The default GNOME browser - if "GNOME_DESKTOP_SESSION_ID" in os.environ and _iscommand("gnome-open"): - register("gnome-open", None, BackgroundBrowser("gnome-open")) - - # The default KDE browser - if "KDE_FULL_SESSION" in os.environ and _iscommand("kfmclient"): - register("kfmclient", Konqueror, Konqueror("kfmclient")) - - if _iscommand("x-www-browser"): - register("x-www-browser", None, BackgroundBrowser("x-www-browser")) - - # The Mozilla/Netscape browsers - for browser in ("mozilla-firefox", "firefox", - "mozilla-firebird", "firebird", - "iceweasel", "iceape", - "seamonkey", "mozilla", "netscape"): - if _iscommand(browser): - register(browser, None, Mozilla(browser)) - - # Konqueror/kfm, the KDE browser. - if _iscommand("kfm"): - register("kfm", Konqueror, Konqueror("kfm")) - elif _iscommand("konqueror"): - register("konqueror", Konqueror, Konqueror("konqueror")) - - # Gnome's Galeon and Epiphany - for browser in ("galeon", "epiphany"): - if _iscommand(browser): - register(browser, None, Galeon(browser)) - - # Skipstone, another Gtk/Mozilla based browser - if _iscommand("skipstone"): - register("skipstone", None, BackgroundBrowser("skipstone")) - - # Google Chrome/Chromium browsers - for browser in ("google-chrome", "chrome", "chromium", "chromium-browser"): - if _iscommand(browser): - register(browser, None, Chrome(browser)) - - # Opera, quite popular - if _iscommand("opera"): - register("opera", None, Opera("opera")) - - # Next, Mosaic -- old but still in use. - if _iscommand("mosaic"): - register("mosaic", None, BackgroundBrowser("mosaic")) - - # Grail, the Python browser. Does anybody still use it? - if _iscommand("grail"): - register("grail", Grail, None) - -# Prefer X browsers if present -if os.environ.get("DISPLAY"): - register_X_browsers() - -# Also try console browsers -if os.environ.get("TERM"): - if _iscommand("www-browser"): - register("www-browser", None, GenericBrowser("www-browser")) - # The Links/elinks browsers - if _iscommand("links"): - register("links", None, GenericBrowser("links")) - if _iscommand("elinks"): - register("elinks", None, Elinks("elinks")) - # The Lynx browser , - if _iscommand("lynx"): - register("lynx", None, GenericBrowser("lynx")) - # The w3m browser - if _iscommand("w3m"): - register("w3m", None, GenericBrowser("w3m")) - -# -# Platform support for Windows -# - -if sys.platform[:3] == "win": - class WindowsDefault(BaseBrowser): - def open(self, url, new=0, autoraise=True): - try: - os.startfile(url) - except WindowsError: - # [Error 22] No application is associated with the specified - # file for this operation: '' - return False - else: - return True - - _tryorder = [] - _browsers = {} - - # First try to use the default Windows browser - register("windows-default", WindowsDefault) - - # Detect some common Windows browsers, fallback to IE - iexplore = os.path.join(os.environ.get("PROGRAMFILES", "C:\\Program Files"), - "Internet Explorer\\IEXPLORE.EXE") - for browser in ("firefox", "firebird", "seamonkey", "mozilla", - "netscape", "opera", iexplore): - if _iscommand(browser): - register(browser, None, BackgroundBrowser(browser)) - -# -# Platform support for MacOS -# - -if sys.platform == 'darwin': - # Adapted from patch submitted to SourceForge by Steven J. Burr - class MacOSX(BaseBrowser): - """Launcher class for Aqua browsers on Mac OS X - - Optionally specify a browser name on instantiation. Note that this - will not work for Aqua browsers if the user has moved the application - package after installation. - - If no browser is specified, the default browser, as specified in the - Internet System Preferences panel, will be used. - """ - def __init__(self, name): - self.name = name - - def open(self, url, new=0, autoraise=True): - assert "'" not in url - # hack for local urls - if not ':' in url: - url = 'file:'+url - - # new must be 0 or 1 - new = int(bool(new)) - if self.name == "default": - # User called open, open_new or get without a browser parameter - script = 'open location "%s"' % url.replace('"', '%22') # opens in default browser - else: - # User called get and chose a browser - if self.name == "OmniWeb": - toWindow = "" - else: - # Include toWindow parameter of OpenURL command for browsers - # that support it. 0 == new window; -1 == existing - toWindow = "toWindow %d" % (new - 1) - cmd = 'OpenURL "%s"' % url.replace('"', '%22') - script = '''tell application "%s" - activate - %s %s - end tell''' % (self.name, cmd, toWindow) - # Open pipe to AppleScript through osascript command - osapipe = os.popen("osascript", "w") - if osapipe is None: - return False - # Write script to osascript's stdin - osapipe.write(script) - rc = osapipe.close() - return not rc - - class MacOSXOSAScript(BaseBrowser): - def __init__(self, name): - self._name = name - - def open(self, url, new=0, autoraise=True): - if self._name == 'default': - script = 'open location "%s"' % url.replace('"', '%22') # opens in default browser - else: - script = ''' - tell application "%s" - activate - open location "%s" - end - '''%(self._name, url.replace('"', '%22')) - - osapipe = os.popen("osascript", "w") - if osapipe is None: - return False - - osapipe.write(script) - rc = osapipe.close() - return not rc - - - # Don't clear _tryorder or _browsers since OS X can use above Unix support - # (but we prefer using the OS X specific stuff) - register("safari", None, MacOSXOSAScript('safari'), -1) - register("firefox", None, MacOSXOSAScript('firefox'), -1) - register("chrome", None, MacOSXOSAScript('chrome'), -1) - register("MacOSX", None, MacOSXOSAScript('default'), -1) - - -# -# Platform support for OS/2 -# - -if sys.platform[:3] == "os2" and _iscommand("netscape"): - _tryorder = [] - _browsers = {} - register("os2netscape", None, - GenericBrowser(["start", "netscape", "%s"]), -1) - - -# OK, now that we know what the default preference orders for each -# platform are, allow user to override them with the BROWSER variable. -if "BROWSER" in os.environ: - _userchoices = os.environ["BROWSER"].split(os.pathsep) - _userchoices.reverse() - - # Treat choices in same way as if passed into get() but do register - # and prepend to _tryorder - for cmdline in _userchoices: - if cmdline != '': - cmd = _synthesize(cmdline, -1) - if cmd[1] is None: - register(cmdline, None, GenericBrowser(cmdline), -1) - cmdline = None # to make del work if _userchoices was empty - del cmdline - del _userchoices - -# what to do if _tryorder is now empty? - - -def main(): - import getopt - usage = """Usage: %s [-n | -t] url - -n: open new window - -t: open new tab""" % sys.argv[0] - try: - opts, args = getopt.getopt(sys.argv[1:], 'ntd') - except getopt.error, msg: - print >>sys.stderr, msg - print >>sys.stderr, usage - sys.exit(1) - new_win = 0 - for o, a in opts: - if o == '-n': new_win = 1 - elif o == '-t': new_win = 2 - if len(args) != 1: - print >>sys.stderr, usage - sys.exit(1) - - url = args[0] - open(url, new_win) - - print "\a" - -if __name__ == "__main__": - main() diff --git a/python/Lib/whichdb.py b/python/Lib/whichdb.py deleted file mode 100755 index 9071430b15..0000000000 --- a/python/Lib/whichdb.py +++ /dev/null @@ -1,117 +0,0 @@ -# !/usr/bin/env python -"""Guess which db package to use to open a db file.""" - -import os -import struct -import sys - -try: - import dbm - _dbmerror = dbm.error -except ImportError: - dbm = None - # just some sort of valid exception which might be raised in the - # dbm test - _dbmerror = IOError - -def whichdb(filename): - """Guess which db package to use to open a db file. - - Return values: - - - None if the database file can't be read; - - empty string if the file can be read but can't be recognized - - the module name (e.g. "dbm" or "gdbm") if recognized. - - Importing the given module may still fail, and opening the - database using that module may still fail. - """ - - # Check for dbm first -- this has a .pag and a .dir file - try: - f = open(filename + os.extsep + "pag", "rb") - f.close() - # dbm linked with gdbm on OS/2 doesn't have .dir file - if not (dbm.library == "GNU gdbm" and sys.platform == "os2emx"): - f = open(filename + os.extsep + "dir", "rb") - f.close() - return "dbm" - except IOError: - # some dbm emulations based on Berkeley DB generate a .db file - # some do not, but they should be caught by the dbhash checks - try: - f = open(filename + os.extsep + "db", "rb") - f.close() - # guarantee we can actually open the file using dbm - # kind of overkill, but since we are dealing with emulations - # it seems like a prudent step - if dbm is not None: - d = dbm.open(filename) - d.close() - return "dbm" - except (IOError, _dbmerror): - pass - - # Check for dumbdbm next -- this has a .dir and a .dat file - try: - # First check for presence of files - os.stat(filename + os.extsep + "dat") - size = os.stat(filename + os.extsep + "dir").st_size - # dumbdbm files with no keys are empty - if size == 0: - return "dumbdbm" - f = open(filename + os.extsep + "dir", "rb") - try: - if f.read(1) in ("'", '"'): - return "dumbdbm" - finally: - f.close() - except (OSError, IOError): - pass - - # See if the file exists, return None if not - try: - f = open(filename, "rb") - except IOError: - return None - - # Read the start of the file -- the magic number - s16 = f.read(16) - f.close() - s = s16[0:4] - - # Return "" if not at least 4 bytes - if len(s) != 4: - return "" - - # Convert to 4-byte int in native byte order -- return "" if impossible - try: - (magic,) = struct.unpack("=l", s) - except struct.error: - return "" - - # Check for GNU dbm - if magic in (0x13579ace, 0x13579acd, 0x13579acf): - return "gdbm" - - # Check for old Berkeley db hash file format v2 - if magic in (0x00061561, 0x61150600): - return "bsddb185" - - # Later versions of Berkeley db hash file have a 12-byte pad in - # front of the file type - try: - (magic,) = struct.unpack("=l", s16[-4:]) - except struct.error: - return "" - - # Check for BSD hash - if magic in (0x00061561, 0x61150600): - return "dbhash" - - # Unknown - return "" - -if __name__ == "__main__": - for filename in sys.argv[1:]: - print whichdb(filename) or "UNKNOWN", filename diff --git a/python/Lib/wsgiref/__init__.py b/python/Lib/wsgiref/__init__.py deleted file mode 100755 index 46c579f8ec..0000000000 --- a/python/Lib/wsgiref/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -"""wsgiref -- a WSGI (PEP 333) Reference Library - -Current Contents: - -* util -- Miscellaneous useful functions and wrappers - -* headers -- Manage response headers - -* handlers -- base classes for server/gateway implementations - -* simple_server -- a simple BaseHTTPServer that supports WSGI - -* validate -- validation wrapper that sits between an app and a server - to detect errors in either - -To-Do: - -* cgi_gateway -- Run WSGI apps under CGI (pending a deployment standard) - -* cgi_wrapper -- Run CGI apps under WSGI - -* router -- a simple middleware component that handles URL traversal -""" diff --git a/python/Lib/wsgiref/handlers.py b/python/Lib/wsgiref/handlers.py deleted file mode 100755 index 8cb57e223a..0000000000 --- a/python/Lib/wsgiref/handlers.py +++ /dev/null @@ -1,450 +0,0 @@ -"""Base classes for server/gateway implementations""" - -from types import StringType -from util import FileWrapper, guess_scheme, is_hop_by_hop -from headers import Headers - -import sys, os, time - -__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler'] - -try: - dict -except NameError: - def dict(items): - d = {} - for k,v in items: - d[k] = v - return d - -# Uncomment for 2.2 compatibility. -#try: -# True -# False -#except NameError: -# True = not None -# False = not True - - -# Weekday and month names for HTTP date/time formatting; always English! -_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] -_monthname = [None, # Dummy so we can use 1-based month numbers - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] - -def format_date_time(timestamp): - year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) - return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( - _weekdayname[wd], day, _monthname[month], year, hh, mm, ss - ) - - -class BaseHandler: - """Manage the invocation of a WSGI application""" - - # Configuration parameters; can override per-subclass or per-instance - wsgi_version = (1,0) - wsgi_multithread = True - wsgi_multiprocess = True - wsgi_run_once = False - - origin_server = True # We are transmitting direct to client - http_version = "1.0" # Version that should be used for response - server_software = None # String name of server software, if any - - # os_environ is used to supply configuration from the OS environment: - # by default it's a copy of 'os.environ' as of import time, but you can - # override this in e.g. your __init__ method. - os_environ = dict(os.environ.items()) - - # Collaborator classes - wsgi_file_wrapper = FileWrapper # set to None to disable - headers_class = Headers # must be a Headers-like class - - # Error handling (also per-subclass or per-instance) - traceback_limit = None # Print entire traceback to self.get_stderr() - error_status = "500 Internal Server Error" - error_headers = [('Content-Type','text/plain')] - error_body = "A server error occurred. Please contact the administrator." - - # State variables (don't mess with these) - status = result = None - headers_sent = False - headers = None - bytes_sent = 0 - - def run(self, application): - """Invoke the application""" - # Note to self: don't move the close()! Asynchronous servers shouldn't - # call close() from finish_response(), so if you close() anywhere but - # the double-error branch here, you'll break asynchronous servers by - # prematurely closing. Async servers must return from 'run()' without - # closing if there might still be output to iterate over. - try: - self.setup_environ() - self.result = application(self.environ, self.start_response) - self.finish_response() - except: - try: - self.handle_error() - except: - # If we get an error handling an error, just give up already! - self.close() - raise # ...and let the actual server figure it out. - - - def setup_environ(self): - """Set up the environment for one request""" - - env = self.environ = self.os_environ.copy() - self.add_cgi_vars() - - env['wsgi.input'] = self.get_stdin() - env['wsgi.errors'] = self.get_stderr() - env['wsgi.version'] = self.wsgi_version - env['wsgi.run_once'] = self.wsgi_run_once - env['wsgi.url_scheme'] = self.get_scheme() - env['wsgi.multithread'] = self.wsgi_multithread - env['wsgi.multiprocess'] = self.wsgi_multiprocess - - if self.wsgi_file_wrapper is not None: - env['wsgi.file_wrapper'] = self.wsgi_file_wrapper - - if self.origin_server and self.server_software: - env.setdefault('SERVER_SOFTWARE',self.server_software) - - - def finish_response(self): - """Send any iterable data, then close self and the iterable - - Subclasses intended for use in asynchronous servers will - want to redefine this method, such that it sets up callbacks - in the event loop to iterate over the data, and to call - 'self.close()' once the response is finished. - """ - try: - if not self.result_is_file() or not self.sendfile(): - for data in self.result: - self.write(data) - self.finish_content() - finally: - self.close() - - - def get_scheme(self): - """Return the URL scheme being used""" - return guess_scheme(self.environ) - - - def set_content_length(self): - """Compute Content-Length or switch to chunked encoding if possible""" - try: - blocks = len(self.result) - except (TypeError,AttributeError,NotImplementedError): - pass - else: - if blocks==1: - self.headers['Content-Length'] = str(self.bytes_sent) - return - # XXX Try for chunked encoding if origin server and client is 1.1 - - - def cleanup_headers(self): - """Make any necessary header changes or defaults - - Subclasses can extend this to add other defaults. - """ - if 'Content-Length' not in self.headers: - self.set_content_length() - - def start_response(self, status, headers,exc_info=None): - """'start_response()' callable as specified by PEP 333""" - - if exc_info: - try: - if self.headers_sent: - # Re-raise original exception if headers sent - raise exc_info[0], exc_info[1], exc_info[2] - finally: - exc_info = None # avoid dangling circular ref - elif self.headers is not None: - raise AssertionError("Headers already set!") - - assert type(status) is StringType,"Status must be a string" - assert len(status)>=4,"Status must be at least 4 characters" - assert int(status[:3]),"Status message must begin w/3-digit code" - assert status[3]==" ", "Status message must have a space after code" - if __debug__: - for name,val in headers: - assert type(name) is StringType,"Header names must be strings" - assert type(val) is StringType,"Header values must be strings" - assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed" - self.status = status - self.headers = self.headers_class(headers) - return self.write - - - def send_preamble(self): - """Transmit version/status/date/server, via self._write()""" - if self.origin_server: - if self.client_is_modern(): - self._write('HTTP/%s %s\r\n' % (self.http_version,self.status)) - if 'Date' not in self.headers: - self._write( - 'Date: %s\r\n' % format_date_time(time.time()) - ) - if self.server_software and 'Server' not in self.headers: - self._write('Server: %s\r\n' % self.server_software) - else: - self._write('Status: %s\r\n' % self.status) - - def write(self, data): - """'write()' callable as specified by PEP 333""" - - assert type(data) is StringType,"write() argument must be string" - - if not self.status: - raise AssertionError("write() before start_response()") - - elif not self.headers_sent: - # Before the first output, send the stored headers - self.bytes_sent = len(data) # make sure we know content-length - self.send_headers() - else: - self.bytes_sent += len(data) - - # XXX check Content-Length and truncate if too many bytes written? - self._write(data) - self._flush() - - - def sendfile(self): - """Platform-specific file transmission - - Override this method in subclasses to support platform-specific - file transmission. It is only called if the application's - return iterable ('self.result') is an instance of - 'self.wsgi_file_wrapper'. - - This method should return a true value if it was able to actually - transmit the wrapped file-like object using a platform-specific - approach. It should return a false value if normal iteration - should be used instead. An exception can be raised to indicate - that transmission was attempted, but failed. - - NOTE: this method should call 'self.send_headers()' if - 'self.headers_sent' is false and it is going to attempt direct - transmission of the file. - """ - return False # No platform-specific transmission by default - - - def finish_content(self): - """Ensure headers and content have both been sent""" - if not self.headers_sent: - # Only zero Content-Length if not set by the application (so - # that HEAD requests can be satisfied properly, see #3839) - self.headers.setdefault('Content-Length', "0") - self.send_headers() - else: - pass # XXX check if content-length was too short? - - def close(self): - """Close the iterable (if needed) and reset all instance vars - - Subclasses may want to also drop the client connection. - """ - try: - if hasattr(self.result,'close'): - self.result.close() - finally: - self.result = self.headers = self.status = self.environ = None - self.bytes_sent = 0; self.headers_sent = False - - - def send_headers(self): - """Transmit headers to the client, via self._write()""" - self.cleanup_headers() - self.headers_sent = True - if not self.origin_server or self.client_is_modern(): - self.send_preamble() - self._write(str(self.headers)) - - - def result_is_file(self): - """True if 'self.result' is an instance of 'self.wsgi_file_wrapper'""" - wrapper = self.wsgi_file_wrapper - return wrapper is not None and isinstance(self.result,wrapper) - - - def client_is_modern(self): - """True if client can accept status and headers""" - return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9' - - - def log_exception(self,exc_info): - """Log the 'exc_info' tuple in the server log - - Subclasses may override to retarget the output or change its format. - """ - try: - from traceback import print_exception - stderr = self.get_stderr() - print_exception( - exc_info[0], exc_info[1], exc_info[2], - self.traceback_limit, stderr - ) - stderr.flush() - finally: - exc_info = None - - def handle_error(self): - """Log current error, and send error output to client if possible""" - self.log_exception(sys.exc_info()) - if not self.headers_sent: - self.result = self.error_output(self.environ, self.start_response) - self.finish_response() - # XXX else: attempt advanced recovery techniques for HTML or text? - - def error_output(self, environ, start_response): - """WSGI mini-app to create error output - - By default, this just uses the 'error_status', 'error_headers', - and 'error_body' attributes to generate an output page. It can - be overridden in a subclass to dynamically generate diagnostics, - choose an appropriate message for the user's preferred language, etc. - - Note, however, that it's not recommended from a security perspective to - spit out diagnostics to any old user; ideally, you should have to do - something special to enable diagnostic output, which is why we don't - include any here! - """ - start_response(self.error_status,self.error_headers[:],sys.exc_info()) - return [self.error_body] - - - # Pure abstract methods; *must* be overridden in subclasses - - def _write(self,data): - """Override in subclass to buffer data for send to client - - It's okay if this method actually transmits the data; BaseHandler - just separates write and flush operations for greater efficiency - when the underlying system actually has such a distinction. - """ - raise NotImplementedError - - def _flush(self): - """Override in subclass to force sending of recent '_write()' calls - - It's okay if this method is a no-op (i.e., if '_write()' actually - sends the data. - """ - raise NotImplementedError - - def get_stdin(self): - """Override in subclass to return suitable 'wsgi.input'""" - raise NotImplementedError - - def get_stderr(self): - """Override in subclass to return suitable 'wsgi.errors'""" - raise NotImplementedError - - def add_cgi_vars(self): - """Override in subclass to insert CGI variables in 'self.environ'""" - raise NotImplementedError - - -class SimpleHandler(BaseHandler): - """Handler that's just initialized with streams, environment, etc. - - This handler subclass is intended for synchronous HTTP/1.0 origin servers, - and handles sending the entire response output, given the correct inputs. - - Usage:: - - handler = SimpleHandler( - inp,out,err,env, multithread=False, multiprocess=True - ) - handler.run(app)""" - - def __init__(self,stdin,stdout,stderr,environ, - multithread=True, multiprocess=False - ): - self.stdin = stdin - self.stdout = stdout - self.stderr = stderr - self.base_env = environ - self.wsgi_multithread = multithread - self.wsgi_multiprocess = multiprocess - - def get_stdin(self): - return self.stdin - - def get_stderr(self): - return self.stderr - - def add_cgi_vars(self): - self.environ.update(self.base_env) - - def _write(self,data): - self.stdout.write(data) - self._write = self.stdout.write - - def _flush(self): - self.stdout.flush() - self._flush = self.stdout.flush - - -class BaseCGIHandler(SimpleHandler): - - """CGI-like systems using input/output/error streams and environ mapping - - Usage:: - - handler = BaseCGIHandler(inp,out,err,env) - handler.run(app) - - This handler class is useful for gateway protocols like ReadyExec and - FastCGI, that have usable input/output/error streams and an environment - mapping. It's also the base class for CGIHandler, which just uses - sys.stdin, os.environ, and so on. - - The constructor also takes keyword arguments 'multithread' and - 'multiprocess' (defaulting to 'True' and 'False' respectively) to control - the configuration sent to the application. It sets 'origin_server' to - False (to enable CGI-like output), and assumes that 'wsgi.run_once' is - False. - """ - - origin_server = False - - -class CGIHandler(BaseCGIHandler): - - """CGI-based invocation via sys.stdin/stdout/stderr and os.environ - - Usage:: - - CGIHandler().run(app) - - The difference between this class and BaseCGIHandler is that it always - uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and - 'wsgi.multiprocess' of 'True'. It does not take any initialization - parameters, but always uses 'sys.stdin', 'os.environ', and friends. - - If you need to override any of these parameters, use BaseCGIHandler - instead. - """ - - wsgi_run_once = True - # Do not allow os.environ to leak between requests in Google App Engine - # and other multi-run CGI use cases. This is not easily testable. - # See http://bugs.python.org/issue7250 - os_environ = {} - - def __init__(self): - BaseCGIHandler.__init__( - self, sys.stdin, sys.stdout, sys.stderr, dict(os.environ.items()), - multithread=False, multiprocess=True - ) diff --git a/python/Lib/wsgiref/headers.py b/python/Lib/wsgiref/headers.py deleted file mode 100755 index 5a95e84c34..0000000000 --- a/python/Lib/wsgiref/headers.py +++ /dev/null @@ -1,169 +0,0 @@ -"""Manage HTTP Response Headers - -Much of this module is red-handedly pilfered from email.message in the stdlib, -so portions are Copyright (C) 2001,2002 Python Software Foundation, and were -written by Barry Warsaw. -""" - -from types import ListType, TupleType - -# Regular expression that matches `special' characters in parameters, the -# existence of which force quoting of the parameter value. -import re -tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') - -def _formatparam(param, value=None, quote=1): - """Convenience function to format and return a key=value pair. - - This will quote the value if needed or if quote is true. - """ - if value is not None and len(value) > 0: - if quote or tspecials.search(value): - value = value.replace('\\', '\\\\').replace('"', r'\"') - return '%s="%s"' % (param, value) - else: - return '%s=%s' % (param, value) - else: - return param - - -class Headers: - - """Manage a collection of HTTP response headers""" - - def __init__(self,headers): - if type(headers) is not ListType: - raise TypeError("Headers must be a list of name/value tuples") - self._headers = headers - - def __len__(self): - """Return the total number of headers, including duplicates.""" - return len(self._headers) - - def __setitem__(self, name, val): - """Set the value of a header.""" - del self[name] - self._headers.append((name, val)) - - def __delitem__(self,name): - """Delete all occurrences of a header, if present. - - Does *not* raise an exception if the header is missing. - """ - name = name.lower() - self._headers[:] = [kv for kv in self._headers if kv[0].lower() != name] - - def __getitem__(self,name): - """Get the first header value for 'name' - - Return None if the header is missing instead of raising an exception. - - Note that if the header appeared multiple times, the first exactly which - occurrence gets returned is undefined. Use getall() to get all - the values matching a header field name. - """ - return self.get(name) - - def has_key(self, name): - """Return true if the message contains the header.""" - return self.get(name) is not None - - __contains__ = has_key - - - def get_all(self, name): - """Return a list of all the values for the named field. - - These will be sorted in the order they appeared in the original header - list or were added to this instance, and may contain duplicates. Any - fields deleted and re-inserted are always appended to the header list. - If no fields exist with the given name, returns an empty list. - """ - name = name.lower() - return [kv[1] for kv in self._headers if kv[0].lower()==name] - - - def get(self,name,default=None): - """Get the first header value for 'name', or return 'default'""" - name = name.lower() - for k,v in self._headers: - if k.lower()==name: - return v - return default - - - def keys(self): - """Return a list of all the header field names. - - These will be sorted in the order they appeared in the original header - list, or were added to this instance, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [k for k, v in self._headers] - - def values(self): - """Return a list of all header values. - - These will be sorted in the order they appeared in the original header - list, or were added to this instance, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [v for k, v in self._headers] - - def items(self): - """Get all the header fields and values. - - These will be sorted in the order they were in the original header - list, or were added to this instance, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return self._headers[:] - - def __repr__(self): - return "Headers(%r)" % self._headers - - def __str__(self): - """str() returns the formatted headers, complete with end line, - suitable for direct HTTP transmission.""" - return '\r\n'.join(["%s: %s" % kv for kv in self._headers]+['','']) - - def setdefault(self,name,value): - """Return first matching header value for 'name', or 'value' - - If there is no header named 'name', add a new header with name 'name' - and value 'value'.""" - result = self.get(name) - if result is None: - self._headers.append((name,value)) - return value - else: - return result - - def add_header(self, _name, _value, **_params): - """Extended header setting. - - _name is the header field to add. keyword arguments can be used to set - additional parameters for the header field, with underscores converted - to dashes. Normally the parameter will be added as key="value" unless - value is None, in which case only the key will be added. - - Example: - - h.add_header('content-disposition', 'attachment', filename='bud.gif') - - Note that unlike the corresponding 'email.message' method, this does - *not* handle '(charset, language, value)' tuples: all values must be - strings or None. - """ - parts = [] - if _value is not None: - parts.append(_value) - for k, v in _params.items(): - if v is None: - parts.append(k.replace('_', '-')) - else: - parts.append(_formatparam(k.replace('_', '-'), v)) - self._headers.append((_name, "; ".join(parts))) diff --git a/python/Lib/wsgiref/simple_server.py b/python/Lib/wsgiref/simple_server.py deleted file mode 100755 index 35b98d10d8..0000000000 --- a/python/Lib/wsgiref/simple_server.py +++ /dev/null @@ -1,163 +0,0 @@ -"""BaseHTTPServer that implements the Python WSGI protocol (PEP 333, rev 1.21) - -This is both an example of how WSGI can be implemented, and a basis for running -simple web applications on a local machine, such as might be done when testing -or debugging an application. It has not been reviewed for security issues, -however, and we strongly recommend that you use a "real" web server for -production use. - -For example usage, see the 'if __name__=="__main__"' block at the end of the -module. See also the BaseHTTPServer module docs for other API information. -""" - -from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -import urllib, sys -from wsgiref.handlers import SimpleHandler - -__version__ = "0.1" -__all__ = ['WSGIServer', 'WSGIRequestHandler', 'demo_app', 'make_server'] - - -server_version = "WSGIServer/" + __version__ -sys_version = "Python/" + sys.version.split()[0] -software_version = server_version + ' ' + sys_version - - -class ServerHandler(SimpleHandler): - - server_software = software_version - - def close(self): - try: - self.request_handler.log_request( - self.status.split(' ',1)[0], self.bytes_sent - ) - finally: - SimpleHandler.close(self) - - - -class WSGIServer(HTTPServer): - - """BaseHTTPServer that implements the Python WSGI protocol""" - - application = None - - def server_bind(self): - """Override server_bind to store the server name.""" - HTTPServer.server_bind(self) - self.setup_environ() - - def setup_environ(self): - # Set up base environment - env = self.base_environ = {} - env['SERVER_NAME'] = self.server_name - env['GATEWAY_INTERFACE'] = 'CGI/1.1' - env['SERVER_PORT'] = str(self.server_port) - env['REMOTE_HOST']='' - env['CONTENT_LENGTH']='' - env['SCRIPT_NAME'] = '' - - def get_app(self): - return self.application - - def set_app(self,application): - self.application = application - - - -class WSGIRequestHandler(BaseHTTPRequestHandler): - - server_version = "WSGIServer/" + __version__ - - def get_environ(self): - env = self.server.base_environ.copy() - env['SERVER_PROTOCOL'] = self.request_version - env['REQUEST_METHOD'] = self.command - if '?' in self.path: - path,query = self.path.split('?',1) - else: - path,query = self.path,'' - - env['PATH_INFO'] = urllib.unquote(path) - env['QUERY_STRING'] = query - - host = self.address_string() - if host != self.client_address[0]: - env['REMOTE_HOST'] = host - env['REMOTE_ADDR'] = self.client_address[0] - - if self.headers.typeheader is None: - env['CONTENT_TYPE'] = self.headers.type - else: - env['CONTENT_TYPE'] = self.headers.typeheader - - length = self.headers.getheader('content-length') - if length: - env['CONTENT_LENGTH'] = length - - for h in self.headers.headers: - k,v = h.split(':',1) - k=k.replace('-','_').upper(); v=v.strip() - if k in env: - continue # skip content length, type,etc. - if 'HTTP_'+k in env: - env['HTTP_'+k] += ','+v # comma-separate multiple headers - else: - env['HTTP_'+k] = v - return env - - def get_stderr(self): - return sys.stderr - - def handle(self): - """Handle a single HTTP request""" - - self.raw_requestline = self.rfile.readline(65537) - if len(self.raw_requestline) > 65536: - self.requestline = '' - self.request_version = '' - self.command = '' - self.send_error(414) - return - - if not self.parse_request(): # An error code has been sent, just exit - return - - handler = ServerHandler( - self.rfile, self.wfile, self.get_stderr(), self.get_environ() - ) - handler.request_handler = self # backpointer for logging - handler.run(self.server.get_app()) - - - -def demo_app(environ,start_response): - from StringIO import StringIO - stdout = StringIO() - print >>stdout, "Hello world!" - print >>stdout - h = environ.items(); h.sort() - for k,v in h: - print >>stdout, k,'=', repr(v) - start_response("200 OK", [('Content-Type','text/plain')]) - return [stdout.getvalue()] - - -def make_server( - host, port, app, server_class=WSGIServer, handler_class=WSGIRequestHandler -): - """Create a new WSGI server listening on `host` and `port` for `app`""" - server = server_class((host, port), handler_class) - server.set_app(app) - return server - - -if __name__ == '__main__': - httpd = make_server('', 8000, demo_app) - sa = httpd.socket.getsockname() - print "Serving HTTP on", sa[0], "port", sa[1], "..." - import webbrowser - webbrowser.open('http://localhost:8000/xyz?abc') - httpd.handle_request() # serve one request, then exit - httpd.server_close() diff --git a/python/Lib/wsgiref/util.py b/python/Lib/wsgiref/util.py deleted file mode 100755 index 194b187a4d..0000000000 --- a/python/Lib/wsgiref/util.py +++ /dev/null @@ -1,165 +0,0 @@ -"""Miscellaneous WSGI-related Utilities""" - -import posixpath - -__all__ = [ - 'FileWrapper', 'guess_scheme', 'application_uri', 'request_uri', - 'shift_path_info', 'setup_testing_defaults', -] - - -class FileWrapper: - """Wrapper to convert file-like objects to iterables""" - - def __init__(self, filelike, blksize=8192): - self.filelike = filelike - self.blksize = blksize - if hasattr(filelike,'close'): - self.close = filelike.close - - def __getitem__(self,key): - data = self.filelike.read(self.blksize) - if data: - return data - raise IndexError - - def __iter__(self): - return self - - def next(self): - data = self.filelike.read(self.blksize) - if data: - return data - raise StopIteration - -def guess_scheme(environ): - """Return a guess for whether 'wsgi.url_scheme' should be 'http' or 'https' - """ - if environ.get("HTTPS") in ('yes','on','1'): - return 'https' - else: - return 'http' - -def application_uri(environ): - """Return the application's base URI (no PATH_INFO or QUERY_STRING)""" - url = environ['wsgi.url_scheme']+'://' - from urllib import quote - - if environ.get('HTTP_HOST'): - url += environ['HTTP_HOST'] - else: - url += environ['SERVER_NAME'] - - if environ['wsgi.url_scheme'] == 'https': - if environ['SERVER_PORT'] != '443': - url += ':' + environ['SERVER_PORT'] - else: - if environ['SERVER_PORT'] != '80': - url += ':' + environ['SERVER_PORT'] - - url += quote(environ.get('SCRIPT_NAME') or '/') - return url - -def request_uri(environ, include_query=1): - """Return the full request URI, optionally including the query string""" - url = application_uri(environ) - from urllib import quote - path_info = quote(environ.get('PATH_INFO',''),safe='/;=,') - if not environ.get('SCRIPT_NAME'): - url += path_info[1:] - else: - url += path_info - if include_query and environ.get('QUERY_STRING'): - url += '?' + environ['QUERY_STRING'] - return url - -def shift_path_info(environ): - """Shift a name from PATH_INFO to SCRIPT_NAME, returning it - - If there are no remaining path segments in PATH_INFO, return None. - Note: 'environ' is modified in-place; use a copy if you need to keep - the original PATH_INFO or SCRIPT_NAME. - - Note: when PATH_INFO is just a '/', this returns '' and appends a trailing - '/' to SCRIPT_NAME, even though empty path segments are normally ignored, - and SCRIPT_NAME doesn't normally end in a '/'. This is intentional - behavior, to ensure that an application can tell the difference between - '/x' and '/x/' when traversing to objects. - """ - path_info = environ.get('PATH_INFO','') - if not path_info: - return None - - path_parts = path_info.split('/') - path_parts[1:-1] = [p for p in path_parts[1:-1] if p and p != '.'] - name = path_parts[1] - del path_parts[1] - - script_name = environ.get('SCRIPT_NAME','') - script_name = posixpath.normpath(script_name+'/'+name) - if script_name.endswith('/'): - script_name = script_name[:-1] - if not name and not script_name.endswith('/'): - script_name += '/' - - environ['SCRIPT_NAME'] = script_name - environ['PATH_INFO'] = '/'.join(path_parts) - - # Special case: '/.' on PATH_INFO doesn't get stripped, - # because we don't strip the last element of PATH_INFO - # if there's only one path part left. Instead of fixing this - # above, we fix it here so that PATH_INFO gets normalized to - # an empty string in the environ. - if name=='.': - name = None - return name - -def setup_testing_defaults(environ): - """Update 'environ' with trivial defaults for testing purposes - - This adds various parameters required for WSGI, including HTTP_HOST, - SERVER_NAME, SERVER_PORT, REQUEST_METHOD, SCRIPT_NAME, PATH_INFO, - and all of the wsgi.* variables. It only supplies default values, - and does not replace any existing settings for these variables. - - This routine is intended to make it easier for unit tests of WSGI - servers and applications to set up dummy environments. It should *not* - be used by actual WSGI servers or applications, since the data is fake! - """ - - environ.setdefault('SERVER_NAME','127.0.0.1') - environ.setdefault('SERVER_PROTOCOL','HTTP/1.0') - - environ.setdefault('HTTP_HOST',environ['SERVER_NAME']) - environ.setdefault('REQUEST_METHOD','GET') - - if 'SCRIPT_NAME' not in environ and 'PATH_INFO' not in environ: - environ.setdefault('SCRIPT_NAME','') - environ.setdefault('PATH_INFO','/') - - environ.setdefault('wsgi.version', (1,0)) - environ.setdefault('wsgi.run_once', 0) - environ.setdefault('wsgi.multithread', 0) - environ.setdefault('wsgi.multiprocess', 0) - - from StringIO import StringIO - environ.setdefault('wsgi.input', StringIO("")) - environ.setdefault('wsgi.errors', StringIO()) - environ.setdefault('wsgi.url_scheme',guess_scheme(environ)) - - if environ['wsgi.url_scheme']=='http': - environ.setdefault('SERVER_PORT', '80') - elif environ['wsgi.url_scheme']=='https': - environ.setdefault('SERVER_PORT', '443') - - - -_hoppish = { - 'connection':1, 'keep-alive':1, 'proxy-authenticate':1, - 'proxy-authorization':1, 'te':1, 'trailers':1, 'transfer-encoding':1, - 'upgrade':1 -}.__contains__ - -def is_hop_by_hop(header_name): - """Return true if 'header_name' is an HTTP/1.1 "Hop-by-Hop" header""" - return _hoppish(header_name.lower()) diff --git a/python/Lib/wsgiref/validate.py b/python/Lib/wsgiref/validate.py deleted file mode 100755 index c3278120b0..0000000000 --- a/python/Lib/wsgiref/validate.py +++ /dev/null @@ -1,432 +0,0 @@ -# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) -# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php -# Also licenced under the Apache License, 2.0: http://opensource.org/licenses/apache2.0.php -# Licensed to PSF under a Contributor Agreement -""" -Middleware to check for obedience to the WSGI specification. - -Some of the things this checks: - -* Signature of the application and start_response (including that - keyword arguments are not used). - -* Environment checks: - - - Environment is a dictionary (and not a subclass). - - - That all the required keys are in the environment: REQUEST_METHOD, - SERVER_NAME, SERVER_PORT, wsgi.version, wsgi.input, wsgi.errors, - wsgi.multithread, wsgi.multiprocess, wsgi.run_once - - - That HTTP_CONTENT_TYPE and HTTP_CONTENT_LENGTH are not in the - environment (these headers should appear as CONTENT_LENGTH and - CONTENT_TYPE). - - - Warns if QUERY_STRING is missing, as the cgi module acts - unpredictably in that case. - - - That CGI-style variables (that don't contain a .) have - (non-unicode) string values - - - That wsgi.version is a tuple - - - That wsgi.url_scheme is 'http' or 'https' (@@: is this too - restrictive?) - - - Warns if the REQUEST_METHOD is not known (@@: probably too - restrictive). - - - That SCRIPT_NAME and PATH_INFO are empty or start with / - - - That at least one of SCRIPT_NAME or PATH_INFO are set. - - - That CONTENT_LENGTH is a positive integer. - - - That SCRIPT_NAME is not '/' (it should be '', and PATH_INFO should - be '/'). - - - That wsgi.input has the methods read, readline, readlines, and - __iter__ - - - That wsgi.errors has the methods flush, write, writelines - -* The status is a string, contains a space, starts with an integer, - and that integer is in range (> 100). - -* That the headers is a list (not a subclass, not another kind of - sequence). - -* That the items of the headers are tuples of strings. - -* That there is no 'status' header (that is used in CGI, but not in - WSGI). - -* That the headers don't contain newlines or colons, end in _ or -, or - contain characters codes below 037. - -* That Content-Type is given if there is content (CGI often has a - default content type, but WSGI does not). - -* That no Content-Type is given when there is no content (@@: is this - too restrictive?) - -* That the exc_info argument to start_response is a tuple or None. - -* That all calls to the writer are with strings, and no other methods - on the writer are accessed. - -* That wsgi.input is used properly: - - - .read() is called with zero or one argument - - - That it returns a string - - - That readline, readlines, and __iter__ return strings - - - That .close() is not called - - - No other methods are provided - -* That wsgi.errors is used properly: - - - .write() and .writelines() is called with a string - - - That .close() is not called, and no other methods are provided. - -* The response iterator: - - - That it is not a string (it should be a list of a single string; a - string will work, but perform horribly). - - - That .next() returns a string - - - That the iterator is not iterated over until start_response has - been called (that can signal either a server or application - error). - - - That .close() is called (doesn't raise exception, only prints to - sys.stderr, because we only know it isn't called when the object - is garbage collected). -""" -__all__ = ['validator'] - - -import re -import sys -from types import DictType, StringType, TupleType, ListType -import warnings - -header_re = re.compile(r'^[a-zA-Z][a-zA-Z0-9\-_]*$') -bad_header_value_re = re.compile(r'[\000-\037]') - -class WSGIWarning(Warning): - """ - Raised in response to WSGI-spec-related warnings - """ - -def assert_(cond, *args): - if not cond: - raise AssertionError(*args) - -def validator(application): - - """ - When applied between a WSGI server and a WSGI application, this - middleware will check for WSGI compliancy on a number of levels. - This middleware does not modify the request or response in any - way, but will raise an AssertionError if anything seems off - (except for a failure to close the application iterator, which - will be printed to stderr -- there's no way to raise an exception - at that point). - """ - - def lint_app(*args, **kw): - assert_(len(args) == 2, "Two arguments required") - assert_(not kw, "No keyword arguments allowed") - environ, start_response = args - - check_environ(environ) - - # We use this to check if the application returns without - # calling start_response: - start_response_started = [] - - def start_response_wrapper(*args, **kw): - assert_(len(args) == 2 or len(args) == 3, ( - "Invalid number of arguments: %s" % (args,))) - assert_(not kw, "No keyword arguments allowed") - status = args[0] - headers = args[1] - if len(args) == 3: - exc_info = args[2] - else: - exc_info = None - - check_status(status) - check_headers(headers) - check_content_type(status, headers) - check_exc_info(exc_info) - - start_response_started.append(None) - return WriteWrapper(start_response(*args)) - - environ['wsgi.input'] = InputWrapper(environ['wsgi.input']) - environ['wsgi.errors'] = ErrorWrapper(environ['wsgi.errors']) - - iterator = application(environ, start_response_wrapper) - assert_(iterator is not None and iterator != False, - "The application must return an iterator, if only an empty list") - - check_iterator(iterator) - - return IteratorWrapper(iterator, start_response_started) - - return lint_app - -class InputWrapper: - - def __init__(self, wsgi_input): - self.input = wsgi_input - - def read(self, *args): - assert_(len(args) <= 1) - v = self.input.read(*args) - assert_(type(v) is type("")) - return v - - def readline(self): - v = self.input.readline() - assert_(type(v) is type("")) - return v - - def readlines(self, *args): - assert_(len(args) <= 1) - lines = self.input.readlines(*args) - assert_(type(lines) is type([])) - for line in lines: - assert_(type(line) is type("")) - return lines - - def __iter__(self): - while 1: - line = self.readline() - if not line: - return - yield line - - def close(self): - assert_(0, "input.close() must not be called") - -class ErrorWrapper: - - def __init__(self, wsgi_errors): - self.errors = wsgi_errors - - def write(self, s): - assert_(type(s) is type("")) - self.errors.write(s) - - def flush(self): - self.errors.flush() - - def writelines(self, seq): - for line in seq: - self.write(line) - - def close(self): - assert_(0, "errors.close() must not be called") - -class WriteWrapper: - - def __init__(self, wsgi_writer): - self.writer = wsgi_writer - - def __call__(self, s): - assert_(type(s) is type("")) - self.writer(s) - -class PartialIteratorWrapper: - - def __init__(self, wsgi_iterator): - self.iterator = wsgi_iterator - - def __iter__(self): - # We want to make sure __iter__ is called - return IteratorWrapper(self.iterator, None) - -class IteratorWrapper: - - def __init__(self, wsgi_iterator, check_start_response): - self.original_iterator = wsgi_iterator - self.iterator = iter(wsgi_iterator) - self.closed = False - self.check_start_response = check_start_response - - def __iter__(self): - return self - - def next(self): - assert_(not self.closed, - "Iterator read after closed") - v = self.iterator.next() - if self.check_start_response is not None: - assert_(self.check_start_response, - "The application returns and we started iterating over its body, but start_response has not yet been called") - self.check_start_response = None - return v - - def close(self): - self.closed = True - if hasattr(self.original_iterator, 'close'): - self.original_iterator.close() - - def __del__(self): - if not self.closed: - sys.stderr.write( - "Iterator garbage collected without being closed") - assert_(self.closed, - "Iterator garbage collected without being closed") - -def check_environ(environ): - assert_(type(environ) is DictType, - "Environment is not of the right type: %r (environment: %r)" - % (type(environ), environ)) - - for key in ['REQUEST_METHOD', 'SERVER_NAME', 'SERVER_PORT', - 'wsgi.version', 'wsgi.input', 'wsgi.errors', - 'wsgi.multithread', 'wsgi.multiprocess', - 'wsgi.run_once']: - assert_(key in environ, - "Environment missing required key: %r" % (key,)) - - for key in ['HTTP_CONTENT_TYPE', 'HTTP_CONTENT_LENGTH']: - assert_(key not in environ, - "Environment should not have the key: %s " - "(use %s instead)" % (key, key[5:])) - - if 'QUERY_STRING' not in environ: - warnings.warn( - 'QUERY_STRING is not in the WSGI environment; the cgi ' - 'module will use sys.argv when this variable is missing, ' - 'so application errors are more likely', - WSGIWarning) - - for key in environ.keys(): - if '.' in key: - # Extension, we don't care about its type - continue - assert_(type(environ[key]) is StringType, - "Environmental variable %s is not a string: %r (value: %r)" - % (key, type(environ[key]), environ[key])) - - assert_(type(environ['wsgi.version']) is TupleType, - "wsgi.version should be a tuple (%r)" % (environ['wsgi.version'],)) - assert_(environ['wsgi.url_scheme'] in ('http', 'https'), - "wsgi.url_scheme unknown: %r" % environ['wsgi.url_scheme']) - - check_input(environ['wsgi.input']) - check_errors(environ['wsgi.errors']) - - # @@: these need filling out: - if environ['REQUEST_METHOD'] not in ( - 'GET', 'HEAD', 'POST', 'OPTIONS', 'PATCH', 'PUT', 'DELETE', 'TRACE'): - warnings.warn( - "Unknown REQUEST_METHOD: %r" % environ['REQUEST_METHOD'], - WSGIWarning) - - assert_(not environ.get('SCRIPT_NAME') - or environ['SCRIPT_NAME'].startswith('/'), - "SCRIPT_NAME doesn't start with /: %r" % environ['SCRIPT_NAME']) - assert_(not environ.get('PATH_INFO') - or environ['PATH_INFO'].startswith('/'), - "PATH_INFO doesn't start with /: %r" % environ['PATH_INFO']) - if environ.get('CONTENT_LENGTH'): - assert_(int(environ['CONTENT_LENGTH']) >= 0, - "Invalid CONTENT_LENGTH: %r" % environ['CONTENT_LENGTH']) - - if not environ.get('SCRIPT_NAME'): - assert_('PATH_INFO' in environ, - "One of SCRIPT_NAME or PATH_INFO are required (PATH_INFO " - "should at least be '/' if SCRIPT_NAME is empty)") - assert_(environ.get('SCRIPT_NAME') != '/', - "SCRIPT_NAME cannot be '/'; it should instead be '', and " - "PATH_INFO should be '/'") - -def check_input(wsgi_input): - for attr in ['read', 'readline', 'readlines', '__iter__']: - assert_(hasattr(wsgi_input, attr), - "wsgi.input (%r) doesn't have the attribute %s" - % (wsgi_input, attr)) - -def check_errors(wsgi_errors): - for attr in ['flush', 'write', 'writelines']: - assert_(hasattr(wsgi_errors, attr), - "wsgi.errors (%r) doesn't have the attribute %s" - % (wsgi_errors, attr)) - -def check_status(status): - assert_(type(status) is StringType, - "Status must be a string (not %r)" % status) - # Implicitly check that we can turn it into an integer: - status_code = status.split(None, 1)[0] - assert_(len(status_code) == 3, - "Status codes must be three characters: %r" % status_code) - status_int = int(status_code) - assert_(status_int >= 100, "Status code is invalid: %r" % status_int) - if len(status) < 4 or status[3] != ' ': - warnings.warn( - "The status string (%r) should be a three-digit integer " - "followed by a single space and a status explanation" - % status, WSGIWarning) - -def check_headers(headers): - assert_(type(headers) is ListType, - "Headers (%r) must be of type list: %r" - % (headers, type(headers))) - header_names = {} - for item in headers: - assert_(type(item) is TupleType, - "Individual headers (%r) must be of type tuple: %r" - % (item, type(item))) - assert_(len(item) == 2) - name, value = item - assert_(name.lower() != 'status', - "The Status header cannot be used; it conflicts with CGI " - "script, and HTTP status is not given through headers " - "(value: %r)." % value) - header_names[name.lower()] = None - assert_('\n' not in name and ':' not in name, - "Header names may not contain ':' or '\\n': %r" % name) - assert_(header_re.search(name), "Bad header name: %r" % name) - assert_(not name.endswith('-') and not name.endswith('_'), - "Names may not end in '-' or '_': %r" % name) - if bad_header_value_re.search(value): - assert_(0, "Bad header value: %r (bad char: %r)" - % (value, bad_header_value_re.search(value).group(0))) - -def check_content_type(status, headers): - code = int(status.split(None, 1)[0]) - # @@: need one more person to verify this interpretation of RFC 2616 - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html - NO_MESSAGE_BODY = (204, 304) - for name, value in headers: - if name.lower() == 'content-type': - if code not in NO_MESSAGE_BODY: - return - assert_(0, ("Content-Type header found in a %s response, " - "which must not return content.") % code) - if code not in NO_MESSAGE_BODY: - assert_(0, "No Content-Type header found in headers (%s)" % headers) - -def check_exc_info(exc_info): - assert_(exc_info is None or type(exc_info) is type(()), - "exc_info (%r) is not a tuple: %r" % (exc_info, type(exc_info))) - # More exc_info checks? - -def check_iterator(iterator): - # Technically a string is legal, which is why it's a really bad - # idea, because it may cause the response to be returned - # character-by-character - assert_(not isinstance(iterator, str), - "You should not return a string as your application iterator, " - "instead return a single-item list containing that string.") diff --git a/python/Lib/xdrlib.py b/python/Lib/xdrlib.py deleted file mode 100755 index 7afba497ae..0000000000 --- a/python/Lib/xdrlib.py +++ /dev/null @@ -1,248 +0,0 @@ -"""Implements (a subset of) Sun XDR -- eXternal Data Representation. - -See: RFC 1014 - -""" - -import struct -try: - from cStringIO import StringIO as _StringIO -except ImportError: - from StringIO import StringIO as _StringIO -from functools import wraps - -__all__ = ["Error", "Packer", "Unpacker", "ConversionError"] - -# exceptions -class Error(Exception): - """Exception class for this module. Use: - - except xdrlib.Error, var: - # var has the Error instance for the exception - - Public ivars: - msg -- contains the message - - """ - def __init__(self, msg): - self.msg = msg - def __repr__(self): - return repr(self.msg) - def __str__(self): - return str(self.msg) - - -class ConversionError(Error): - pass - -def raise_conversion_error(function): - """ Wrap any raised struct.errors in a ConversionError. """ - - @wraps(function) - def result(self, value): - try: - return function(self, value) - except struct.error as e: - raise ConversionError(e.args[0]) - return result - - -class Packer: - """Pack various data representations into a buffer.""" - - def __init__(self): - self.reset() - - def reset(self): - self.__buf = _StringIO() - - def get_buffer(self): - return self.__buf.getvalue() - # backwards compatibility - get_buf = get_buffer - - @raise_conversion_error - def pack_uint(self, x): - self.__buf.write(struct.pack('>L', x)) - - @raise_conversion_error - def pack_int(self, x): - self.__buf.write(struct.pack('>l', x)) - - pack_enum = pack_int - - def pack_bool(self, x): - if x: self.__buf.write('\0\0\0\1') - else: self.__buf.write('\0\0\0\0') - - def pack_uhyper(self, x): - try: - self.pack_uint(x>>32 & 0xffffffffL) - except (TypeError, struct.error) as e: - raise ConversionError(e.args[0]) - try: - self.pack_uint(x & 0xffffffffL) - except (TypeError, struct.error) as e: - raise ConversionError(e.args[0]) - - pack_hyper = pack_uhyper - - @raise_conversion_error - def pack_float(self, x): - self.__buf.write(struct.pack('>f', x)) - - @raise_conversion_error - def pack_double(self, x): - self.__buf.write(struct.pack('>d', x)) - - def pack_fstring(self, n, s): - if n < 0: - raise ValueError, 'fstring size must be nonnegative' - data = s[:n] - n = ((n+3)//4)*4 - data = data + (n - len(data)) * '\0' - self.__buf.write(data) - - pack_fopaque = pack_fstring - - def pack_string(self, s): - n = len(s) - self.pack_uint(n) - self.pack_fstring(n, s) - - pack_opaque = pack_string - pack_bytes = pack_string - - def pack_list(self, list, pack_item): - for item in list: - self.pack_uint(1) - pack_item(item) - self.pack_uint(0) - - def pack_farray(self, n, list, pack_item): - if len(list) != n: - raise ValueError, 'wrong array size' - for item in list: - pack_item(item) - - def pack_array(self, list, pack_item): - n = len(list) - self.pack_uint(n) - self.pack_farray(n, list, pack_item) - - - -class Unpacker: - """Unpacks various data representations from the given buffer.""" - - def __init__(self, data): - self.reset(data) - - def reset(self, data): - self.__buf = data - self.__pos = 0 - - def get_position(self): - return self.__pos - - def set_position(self, position): - self.__pos = position - - def get_buffer(self): - return self.__buf - - def done(self): - if self.__pos < len(self.__buf): - raise Error('unextracted data remains') - - def unpack_uint(self): - i = self.__pos - self.__pos = j = i+4 - data = self.__buf[i:j] - if len(data) < 4: - raise EOFError - x = struct.unpack('>L', data)[0] - try: - return int(x) - except OverflowError: - return x - - def unpack_int(self): - i = self.__pos - self.__pos = j = i+4 - data = self.__buf[i:j] - if len(data) < 4: - raise EOFError - return struct.unpack('>l', data)[0] - - unpack_enum = unpack_int - - def unpack_bool(self): - return bool(self.unpack_int()) - - def unpack_uhyper(self): - hi = self.unpack_uint() - lo = self.unpack_uint() - return long(hi)<<32 | lo - - def unpack_hyper(self): - x = self.unpack_uhyper() - if x >= 0x8000000000000000L: - x = x - 0x10000000000000000L - return x - - def unpack_float(self): - i = self.__pos - self.__pos = j = i+4 - data = self.__buf[i:j] - if len(data) < 4: - raise EOFError - return struct.unpack('>f', data)[0] - - def unpack_double(self): - i = self.__pos - self.__pos = j = i+8 - data = self.__buf[i:j] - if len(data) < 8: - raise EOFError - return struct.unpack('>d', data)[0] - - def unpack_fstring(self, n): - if n < 0: - raise ValueError, 'fstring size must be nonnegative' - i = self.__pos - j = i + (n+3)//4*4 - if j > len(self.__buf): - raise EOFError - self.__pos = j - return self.__buf[i:i+n] - - unpack_fopaque = unpack_fstring - - def unpack_string(self): - n = self.unpack_uint() - return self.unpack_fstring(n) - - unpack_opaque = unpack_string - unpack_bytes = unpack_string - - def unpack_list(self, unpack_item): - list = [] - while 1: - x = self.unpack_uint() - if x == 0: break - if x != 1: - raise ConversionError, '0 or 1 expected, got %r' % (x,) - item = unpack_item() - list.append(item) - return list - - def unpack_farray(self, n, unpack_item): - list = [] - for i in range(n): - list.append(unpack_item()) - return list - - def unpack_array(self, unpack_item): - n = self.unpack_uint() - return self.unpack_farray(n, unpack_item) diff --git a/python/Lib/xml/__init__.py b/python/Lib/xml/__init__.py deleted file mode 100755 index deed983d97..0000000000 --- a/python/Lib/xml/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -"""Core XML support for Python. - -This package contains four sub-packages: - -dom -- The W3C Document Object Model. This supports DOM Level 1 + - Namespaces. - -parsers -- Python wrappers for XML parsers (currently only supports Expat). - -sax -- The Simple API for XML, developed by XML-Dev, led by David - Megginson and ported to Python by Lars Marius Garshol. This - supports the SAX 2 API. - -etree -- The ElementTree XML library. This is a subset of the full - ElementTree XML release. - -""" - - -__all__ = ["dom", "parsers", "sax", "etree"] - -_MINIMUM_XMLPLUS_VERSION = (0, 8, 4) - - -try: - import _xmlplus -except ImportError: - pass -else: - try: - v = _xmlplus.version_info - except AttributeError: - # _xmlplus is too old; ignore it - pass - else: - if v >= _MINIMUM_XMLPLUS_VERSION: - import sys - _xmlplus.__path__.extend(__path__) - sys.modules[__name__] = _xmlplus - else: - del v diff --git a/python/Lib/xml/dom/NodeFilter.py b/python/Lib/xml/dom/NodeFilter.py deleted file mode 100755 index fc052459da..0000000000 --- a/python/Lib/xml/dom/NodeFilter.py +++ /dev/null @@ -1,27 +0,0 @@ -# This is the Python mapping for interface NodeFilter from -# DOM2-Traversal-Range. It contains only constants. - -class NodeFilter: - """ - This is the DOM2 NodeFilter interface. It contains only constants. - """ - FILTER_ACCEPT = 1 - FILTER_REJECT = 2 - FILTER_SKIP = 3 - - SHOW_ALL = 0xFFFFFFFFL - SHOW_ELEMENT = 0x00000001 - SHOW_ATTRIBUTE = 0x00000002 - SHOW_TEXT = 0x00000004 - SHOW_CDATA_SECTION = 0x00000008 - SHOW_ENTITY_REFERENCE = 0x00000010 - SHOW_ENTITY = 0x00000020 - SHOW_PROCESSING_INSTRUCTION = 0x00000040 - SHOW_COMMENT = 0x00000080 - SHOW_DOCUMENT = 0x00000100 - SHOW_DOCUMENT_TYPE = 0x00000200 - SHOW_DOCUMENT_FRAGMENT = 0x00000400 - SHOW_NOTATION = 0x00000800 - - def acceptNode(self, node): - raise NotImplementedError diff --git a/python/Lib/xml/dom/__init__.py b/python/Lib/xml/dom/__init__.py deleted file mode 100755 index 6363d00630..0000000000 --- a/python/Lib/xml/dom/__init__.py +++ /dev/null @@ -1,139 +0,0 @@ -"""W3C Document Object Model implementation for Python. - -The Python mapping of the Document Object Model is documented in the -Python Library Reference in the section on the xml.dom package. - -This package contains the following modules: - -minidom -- A simple implementation of the Level 1 DOM with namespace - support added (based on the Level 2 specification) and other - minor Level 2 functionality. - -pulldom -- DOM builder supporting on-demand tree-building for selected - subtrees of the document. - -""" - - -class Node: - """Class giving the NodeType constants.""" - - # DOM implementations may use this as a base class for their own - # Node implementations. If they don't, the constants defined here - # should still be used as the canonical definitions as they match - # the values given in the W3C recommendation. Client code can - # safely refer to these values in all tests of Node.nodeType - # values. - - ELEMENT_NODE = 1 - ATTRIBUTE_NODE = 2 - TEXT_NODE = 3 - CDATA_SECTION_NODE = 4 - ENTITY_REFERENCE_NODE = 5 - ENTITY_NODE = 6 - PROCESSING_INSTRUCTION_NODE = 7 - COMMENT_NODE = 8 - DOCUMENT_NODE = 9 - DOCUMENT_TYPE_NODE = 10 - DOCUMENT_FRAGMENT_NODE = 11 - NOTATION_NODE = 12 - - -#ExceptionCode -INDEX_SIZE_ERR = 1 -DOMSTRING_SIZE_ERR = 2 -HIERARCHY_REQUEST_ERR = 3 -WRONG_DOCUMENT_ERR = 4 -INVALID_CHARACTER_ERR = 5 -NO_DATA_ALLOWED_ERR = 6 -NO_MODIFICATION_ALLOWED_ERR = 7 -NOT_FOUND_ERR = 8 -NOT_SUPPORTED_ERR = 9 -INUSE_ATTRIBUTE_ERR = 10 -INVALID_STATE_ERR = 11 -SYNTAX_ERR = 12 -INVALID_MODIFICATION_ERR = 13 -NAMESPACE_ERR = 14 -INVALID_ACCESS_ERR = 15 -VALIDATION_ERR = 16 - - -class DOMException(Exception): - """Abstract base class for DOM exceptions. - Exceptions with specific codes are specializations of this class.""" - - def __init__(self, *args, **kw): - if self.__class__ is DOMException: - raise RuntimeError( - "DOMException should not be instantiated directly") - Exception.__init__(self, *args, **kw) - - def _get_code(self): - return self.code - - -class IndexSizeErr(DOMException): - code = INDEX_SIZE_ERR - -class DomstringSizeErr(DOMException): - code = DOMSTRING_SIZE_ERR - -class HierarchyRequestErr(DOMException): - code = HIERARCHY_REQUEST_ERR - -class WrongDocumentErr(DOMException): - code = WRONG_DOCUMENT_ERR - -class InvalidCharacterErr(DOMException): - code = INVALID_CHARACTER_ERR - -class NoDataAllowedErr(DOMException): - code = NO_DATA_ALLOWED_ERR - -class NoModificationAllowedErr(DOMException): - code = NO_MODIFICATION_ALLOWED_ERR - -class NotFoundErr(DOMException): - code = NOT_FOUND_ERR - -class NotSupportedErr(DOMException): - code = NOT_SUPPORTED_ERR - -class InuseAttributeErr(DOMException): - code = INUSE_ATTRIBUTE_ERR - -class InvalidStateErr(DOMException): - code = INVALID_STATE_ERR - -class SyntaxErr(DOMException): - code = SYNTAX_ERR - -class InvalidModificationErr(DOMException): - code = INVALID_MODIFICATION_ERR - -class NamespaceErr(DOMException): - code = NAMESPACE_ERR - -class InvalidAccessErr(DOMException): - code = INVALID_ACCESS_ERR - -class ValidationErr(DOMException): - code = VALIDATION_ERR - -class UserDataHandler: - """Class giving the operation constants for UserDataHandler.handle().""" - - # Based on DOM Level 3 (WD 9 April 2002) - - NODE_CLONED = 1 - NODE_IMPORTED = 2 - NODE_DELETED = 3 - NODE_RENAMED = 4 - -XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" -XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/" -XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" -EMPTY_NAMESPACE = None -EMPTY_PREFIX = None - -from domreg import getDOMImplementation,registerDOMImplementation diff --git a/python/Lib/xml/dom/domreg.py b/python/Lib/xml/dom/domreg.py deleted file mode 100755 index ec3acdf9c1..0000000000 --- a/python/Lib/xml/dom/domreg.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Registration facilities for DOM. This module should not be used -directly. Instead, the functions getDOMImplementation and -registerDOMImplementation should be imported from xml.dom.""" - -from xml.dom.minicompat import * # isinstance, StringTypes - -# This is a list of well-known implementations. Well-known names -# should be published by posting to xml-sig@python.org, and are -# subsequently recorded in this file. - -well_known_implementations = { - 'minidom':'xml.dom.minidom', - '4DOM': 'xml.dom.DOMImplementation', - } - -# DOM implementations not officially registered should register -# themselves with their - -registered = {} - -def registerDOMImplementation(name, factory): - """registerDOMImplementation(name, factory) - - Register the factory function with the name. The factory function - should return an object which implements the DOMImplementation - interface. The factory function can either return the same object, - or a new one (e.g. if that implementation supports some - customization).""" - - registered[name] = factory - -def _good_enough(dom, features): - "_good_enough(dom, features) -> Return 1 if the dom offers the features" - for f,v in features: - if not dom.hasFeature(f,v): - return 0 - return 1 - -def getDOMImplementation(name = None, features = ()): - """getDOMImplementation(name = None, features = ()) -> DOM implementation. - - Return a suitable DOM implementation. The name is either - well-known, the module name of a DOM implementation, or None. If - it is not None, imports the corresponding module and returns - DOMImplementation object if the import succeeds. - - If name is not given, consider the available implementations to - find one with the required feature set. If no implementation can - be found, raise an ImportError. The features list must be a sequence - of (feature, version) pairs which are passed to hasFeature.""" - - import os - creator = None - mod = well_known_implementations.get(name) - if mod: - mod = __import__(mod, {}, {}, ['getDOMImplementation']) - return mod.getDOMImplementation() - elif name: - return registered[name]() - elif "PYTHON_DOM" in os.environ: - return getDOMImplementation(name = os.environ["PYTHON_DOM"]) - - # User did not specify a name, try implementations in arbitrary - # order, returning the one that has the required features - if isinstance(features, StringTypes): - features = _parse_feature_string(features) - for creator in registered.values(): - dom = creator() - if _good_enough(dom, features): - return dom - - for creator in well_known_implementations.keys(): - try: - dom = getDOMImplementation(name = creator) - except StandardError: # typically ImportError, or AttributeError - continue - if _good_enough(dom, features): - return dom - - raise ImportError,"no suitable DOM implementation found" - -def _parse_feature_string(s): - features = [] - parts = s.split() - i = 0 - length = len(parts) - while i < length: - feature = parts[i] - if feature[0] in "0123456789": - raise ValueError, "bad feature name: %r" % (feature,) - i = i + 1 - version = None - if i < length: - v = parts[i] - if v[0] in "0123456789": - i = i + 1 - version = v - features.append((feature, version)) - return tuple(features) diff --git a/python/Lib/xml/dom/expatbuilder.py b/python/Lib/xml/dom/expatbuilder.py deleted file mode 100755 index 45cd22ea03..0000000000 --- a/python/Lib/xml/dom/expatbuilder.py +++ /dev/null @@ -1,983 +0,0 @@ -"""Facility to use the Expat parser to load a minidom instance -from a string or file. - -This avoids all the overhead of SAX and pulldom to gain performance. -""" - -# Warning! -# -# This module is tightly bound to the implementation details of the -# minidom DOM and can't be used with other DOM implementations. This -# is due, in part, to a lack of appropriate methods in the DOM (there is -# no way to create Entity and Notation nodes via the DOM Level 2 -# interface), and for performance. The latter is the cause of some fairly -# cryptic code. -# -# Performance hacks: -# -# - .character_data_handler() has an extra case in which continuing -# data is appended to an existing Text node; this can be a -# speedup since pyexpat can break up character data into multiple -# callbacks even though we set the buffer_text attribute on the -# parser. This also gives us the advantage that we don't need a -# separate normalization pass. -# -# - Determining that a node exists is done using an identity comparison -# with None rather than a truth test; this avoids searching for and -# calling any methods on the node object if it exists. (A rather -# nice speedup is achieved this way as well!) - -from xml.dom import xmlbuilder, minidom, Node -from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE -from xml.parsers import expat -from xml.dom.minidom import _append_child, _set_attribute_node -from xml.dom.NodeFilter import NodeFilter - -from xml.dom.minicompat import * - -TEXT_NODE = Node.TEXT_NODE -CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE -DOCUMENT_NODE = Node.DOCUMENT_NODE - -FILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT -FILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT -FILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP -FILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT - -theDOMImplementation = minidom.getDOMImplementation() - -# Expat typename -> TypeInfo -_typeinfo_map = { - "CDATA": minidom.TypeInfo(None, "cdata"), - "ENUM": minidom.TypeInfo(None, "enumeration"), - "ENTITY": minidom.TypeInfo(None, "entity"), - "ENTITIES": minidom.TypeInfo(None, "entities"), - "ID": minidom.TypeInfo(None, "id"), - "IDREF": minidom.TypeInfo(None, "idref"), - "IDREFS": minidom.TypeInfo(None, "idrefs"), - "NMTOKEN": minidom.TypeInfo(None, "nmtoken"), - "NMTOKENS": minidom.TypeInfo(None, "nmtokens"), - } - -class ElementInfo(object): - __slots__ = '_attr_info', '_model', 'tagName' - - def __init__(self, tagName, model=None): - self.tagName = tagName - self._attr_info = [] - self._model = model - - def __getstate__(self): - return self._attr_info, self._model, self.tagName - - def __setstate__(self, state): - self._attr_info, self._model, self.tagName = state - - def getAttributeType(self, aname): - for info in self._attr_info: - if info[1] == aname: - t = info[-2] - if t[0] == "(": - return _typeinfo_map["ENUM"] - else: - return _typeinfo_map[info[-2]] - return minidom._no_type - - def getAttributeTypeNS(self, namespaceURI, localName): - return minidom._no_type - - def isElementContent(self): - if self._model: - type = self._model[0] - return type not in (expat.model.XML_CTYPE_ANY, - expat.model.XML_CTYPE_MIXED) - else: - return False - - def isEmpty(self): - if self._model: - return self._model[0] == expat.model.XML_CTYPE_EMPTY - else: - return False - - def isId(self, aname): - for info in self._attr_info: - if info[1] == aname: - return info[-2] == "ID" - return False - - def isIdNS(self, euri, ename, auri, aname): - # not sure this is meaningful - return self.isId((auri, aname)) - -def _intern(builder, s): - return builder._intern_setdefault(s, s) - -def _parse_ns_name(builder, name): - assert ' ' in name - parts = name.split(' ') - intern = builder._intern_setdefault - if len(parts) == 3: - uri, localname, prefix = parts - prefix = intern(prefix, prefix) - qname = "%s:%s" % (prefix, localname) - qname = intern(qname, qname) - localname = intern(localname, localname) - else: - uri, localname = parts - prefix = EMPTY_PREFIX - qname = localname = intern(localname, localname) - return intern(uri, uri), localname, prefix, qname - - -class ExpatBuilder: - """Document builder that uses Expat to build a ParsedXML.DOM document - instance.""" - - def __init__(self, options=None): - if options is None: - options = xmlbuilder.Options() - self._options = options - if self._options.filter is not None: - self._filter = FilterVisibilityController(self._options.filter) - else: - self._filter = None - # This *really* doesn't do anything in this case, so - # override it with something fast & minimal. - self._finish_start_element = id - self._parser = None - self.reset() - - def createParser(self): - """Create a new parser object.""" - return expat.ParserCreate() - - def getParser(self): - """Return the parser object, creating a new one if needed.""" - if not self._parser: - self._parser = self.createParser() - self._intern_setdefault = self._parser.intern.setdefault - self._parser.buffer_text = True - self._parser.ordered_attributes = True - self._parser.specified_attributes = True - self.install(self._parser) - return self._parser - - def reset(self): - """Free all data structures used during DOM construction.""" - self.document = theDOMImplementation.createDocument( - EMPTY_NAMESPACE, None, None) - self.curNode = self.document - self._elem_info = self.document._elem_info - self._cdata = False - - def install(self, parser): - """Install the callbacks needed to build the DOM into the parser.""" - # This creates circular references! - parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler - parser.StartElementHandler = self.first_element_handler - parser.EndElementHandler = self.end_element_handler - parser.ProcessingInstructionHandler = self.pi_handler - if self._options.entities: - parser.EntityDeclHandler = self.entity_decl_handler - parser.NotationDeclHandler = self.notation_decl_handler - if self._options.comments: - parser.CommentHandler = self.comment_handler - if self._options.cdata_sections: - parser.StartCdataSectionHandler = self.start_cdata_section_handler - parser.EndCdataSectionHandler = self.end_cdata_section_handler - parser.CharacterDataHandler = self.character_data_handler_cdata - else: - parser.CharacterDataHandler = self.character_data_handler - parser.ExternalEntityRefHandler = self.external_entity_ref_handler - parser.XmlDeclHandler = self.xml_decl_handler - parser.ElementDeclHandler = self.element_decl_handler - parser.AttlistDeclHandler = self.attlist_decl_handler - - def parseFile(self, file): - """Parse a document from a file object, returning the document - node.""" - parser = self.getParser() - first_buffer = True - try: - while 1: - buffer = file.read(16*1024) - if not buffer: - break - parser.Parse(buffer, 0) - if first_buffer and self.document.documentElement: - self._setup_subset(buffer) - first_buffer = False - parser.Parse("", True) - except ParseEscape: - pass - doc = self.document - self.reset() - self._parser = None - return doc - - def parseString(self, string): - """Parse a document from a string, returning the document node.""" - parser = self.getParser() - try: - parser.Parse(string, True) - self._setup_subset(string) - except ParseEscape: - pass - doc = self.document - self.reset() - self._parser = None - return doc - - def _setup_subset(self, buffer): - """Load the internal subset if there might be one.""" - if self.document.doctype: - extractor = InternalSubsetExtractor() - extractor.parseString(buffer) - subset = extractor.getSubset() - self.document.doctype.internalSubset = subset - - def start_doctype_decl_handler(self, doctypeName, systemId, publicId, - has_internal_subset): - doctype = self.document.implementation.createDocumentType( - doctypeName, publicId, systemId) - doctype.ownerDocument = self.document - _append_child(self.document, doctype) - self.document.doctype = doctype - if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: - self.document.doctype = None - del self.document.childNodes[-1] - doctype = None - self._parser.EntityDeclHandler = None - self._parser.NotationDeclHandler = None - if has_internal_subset: - if doctype is not None: - doctype.entities._seq = [] - doctype.notations._seq = [] - self._parser.CommentHandler = None - self._parser.ProcessingInstructionHandler = None - self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler - - def end_doctype_decl_handler(self): - if self._options.comments: - self._parser.CommentHandler = self.comment_handler - self._parser.ProcessingInstructionHandler = self.pi_handler - if not (self._elem_info or self._filter): - self._finish_end_element = id - - def pi_handler(self, target, data): - node = self.document.createProcessingInstruction(target, data) - _append_child(self.curNode, node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - self.curNode.removeChild(node) - - def character_data_handler_cdata(self, data): - childNodes = self.curNode.childNodes - if self._cdata: - if ( self._cdata_continue - and childNodes[-1].nodeType == CDATA_SECTION_NODE): - childNodes[-1].appendData(data) - return - node = self.document.createCDATASection(data) - self._cdata_continue = True - elif childNodes and childNodes[-1].nodeType == TEXT_NODE: - node = childNodes[-1] - value = node.data + data - d = node.__dict__ - d['data'] = d['nodeValue'] = value - return - else: - node = minidom.Text() - d = node.__dict__ - d['data'] = d['nodeValue'] = data - d['ownerDocument'] = self.document - _append_child(self.curNode, node) - - def character_data_handler(self, data): - childNodes = self.curNode.childNodes - if childNodes and childNodes[-1].nodeType == TEXT_NODE: - node = childNodes[-1] - d = node.__dict__ - d['data'] = d['nodeValue'] = node.data + data - return - node = minidom.Text() - d = node.__dict__ - d['data'] = d['nodeValue'] = node.data + data - d['ownerDocument'] = self.document - _append_child(self.curNode, node) - - def entity_decl_handler(self, entityName, is_parameter_entity, value, - base, systemId, publicId, notationName): - if is_parameter_entity: - # we don't care about parameter entities for the DOM - return - if not self._options.entities: - return - node = self.document._create_entity(entityName, publicId, - systemId, notationName) - if value is not None: - # internal entity - # node *should* be readonly, but we'll cheat - child = self.document.createTextNode(value) - node.childNodes.append(child) - self.document.doctype.entities._seq.append(node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - del self.document.doctype.entities._seq[-1] - - def notation_decl_handler(self, notationName, base, systemId, publicId): - node = self.document._create_notation(notationName, publicId, systemId) - self.document.doctype.notations._seq.append(node) - if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT: - del self.document.doctype.notations._seq[-1] - - def comment_handler(self, data): - node = self.document.createComment(data) - _append_child(self.curNode, node) - if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: - self.curNode.removeChild(node) - - def start_cdata_section_handler(self): - self._cdata = True - self._cdata_continue = False - - def end_cdata_section_handler(self): - self._cdata = False - self._cdata_continue = False - - def external_entity_ref_handler(self, context, base, systemId, publicId): - return 1 - - def first_element_handler(self, name, attributes): - if self._filter is None and not self._elem_info: - self._finish_end_element = id - self.getParser().StartElementHandler = self.start_element_handler - self.start_element_handler(name, attributes) - - def start_element_handler(self, name, attributes): - node = self.document.createElement(name) - _append_child(self.curNode, node) - self.curNode = node - - if attributes: - for i in range(0, len(attributes), 2): - a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, - None, EMPTY_PREFIX) - value = attributes[i+1] - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = value - d = a.__dict__ - d['value'] = d['nodeValue'] = value - d['ownerDocument'] = self.document - _set_attribute_node(node, a) - - if node is not self.document.documentElement: - self._finish_start_element(node) - - def _finish_start_element(self, node): - if self._filter: - # To be general, we'd have to call isSameNode(), but this - # is sufficient for minidom: - if node is self.document.documentElement: - return - filt = self._filter.startContainer(node) - if filt == FILTER_REJECT: - # ignore this node & all descendents - Rejecter(self) - elif filt == FILTER_SKIP: - # ignore this node, but make it's children become - # children of the parent node - Skipper(self) - else: - return - self.curNode = node.parentNode - node.parentNode.removeChild(node) - node.unlink() - - # If this ever changes, Namespaces.end_element_handler() needs to - # be changed to match. - # - def end_element_handler(self, name): - curNode = self.curNode - self.curNode = curNode.parentNode - self._finish_end_element(curNode) - - def _finish_end_element(self, curNode): - info = self._elem_info.get(curNode.tagName) - if info: - self._handle_white_text_nodes(curNode, info) - if self._filter: - if curNode is self.document.documentElement: - return - if self._filter.acceptNode(curNode) == FILTER_REJECT: - self.curNode.removeChild(curNode) - curNode.unlink() - - def _handle_white_text_nodes(self, node, info): - if (self._options.whitespace_in_element_content - or not info.isElementContent()): - return - - # We have element type information and should remove ignorable - # whitespace; identify for text nodes which contain only - # whitespace. - L = [] - for child in node.childNodes: - if child.nodeType == TEXT_NODE and not child.data.strip(): - L.append(child) - - # Remove ignorable whitespace from the tree. - for child in L: - node.removeChild(child) - - def element_decl_handler(self, name, model): - info = self._elem_info.get(name) - if info is None: - self._elem_info[name] = ElementInfo(name, model) - else: - assert info._model is None - info._model = model - - def attlist_decl_handler(self, elem, name, type, default, required): - info = self._elem_info.get(elem) - if info is None: - info = ElementInfo(elem) - self._elem_info[elem] = info - info._attr_info.append( - [None, name, None, None, default, 0, type, required]) - - def xml_decl_handler(self, version, encoding, standalone): - self.document.version = version - self.document.encoding = encoding - # This is still a little ugly, thanks to the pyexpat API. ;-( - if standalone >= 0: - if standalone: - self.document.standalone = True - else: - self.document.standalone = False - - -# Don't include FILTER_INTERRUPT, since that's checked separately -# where allowed. -_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP) - -class FilterVisibilityController(object): - """Wrapper around a DOMBuilderFilter which implements the checks - to make the whatToShow filter attribute work.""" - - __slots__ = 'filter', - - def __init__(self, filter): - self.filter = filter - - def startContainer(self, node): - mask = self._nodetype_mask[node.nodeType] - if self.filter.whatToShow & mask: - val = self.filter.startContainer(node) - if val == FILTER_INTERRUPT: - raise ParseEscape - if val not in _ALLOWED_FILTER_RETURNS: - raise ValueError, \ - "startContainer() returned illegal value: " + repr(val) - return val - else: - return FILTER_ACCEPT - - def acceptNode(self, node): - mask = self._nodetype_mask[node.nodeType] - if self.filter.whatToShow & mask: - val = self.filter.acceptNode(node) - if val == FILTER_INTERRUPT: - raise ParseEscape - if val == FILTER_SKIP: - # move all child nodes to the parent, and remove this node - parent = node.parentNode - for child in node.childNodes[:]: - parent.appendChild(child) - # node is handled by the caller - return FILTER_REJECT - if val not in _ALLOWED_FILTER_RETURNS: - raise ValueError, \ - "acceptNode() returned illegal value: " + repr(val) - return val - else: - return FILTER_ACCEPT - - _nodetype_mask = { - Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT, - Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE, - Node.TEXT_NODE: NodeFilter.SHOW_TEXT, - Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION, - Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE, - Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY, - Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION, - Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT, - Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT, - Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE, - Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT, - Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION, - } - - -class FilterCrutch(object): - __slots__ = '_builder', '_level', '_old_start', '_old_end' - - def __init__(self, builder): - self._level = 0 - self._builder = builder - parser = builder._parser - self._old_start = parser.StartElementHandler - self._old_end = parser.EndElementHandler - parser.StartElementHandler = self.start_element_handler - parser.EndElementHandler = self.end_element_handler - -class Rejecter(FilterCrutch): - __slots__ = () - - def __init__(self, builder): - FilterCrutch.__init__(self, builder) - parser = builder._parser - for name in ("ProcessingInstructionHandler", - "CommentHandler", - "CharacterDataHandler", - "StartCdataSectionHandler", - "EndCdataSectionHandler", - "ExternalEntityRefHandler", - ): - setattr(parser, name, None) - - def start_element_handler(self, *args): - self._level = self._level + 1 - - def end_element_handler(self, *args): - if self._level == 0: - # restore the old handlers - parser = self._builder._parser - self._builder.install(parser) - parser.StartElementHandler = self._old_start - parser.EndElementHandler = self._old_end - else: - self._level = self._level - 1 - -class Skipper(FilterCrutch): - __slots__ = () - - def start_element_handler(self, *args): - node = self._builder.curNode - self._old_start(*args) - if self._builder.curNode is not node: - self._level = self._level + 1 - - def end_element_handler(self, *args): - if self._level == 0: - # We're popping back out of the node we're skipping, so we - # shouldn't need to do anything but reset the handlers. - self._builder._parser.StartElementHandler = self._old_start - self._builder._parser.EndElementHandler = self._old_end - self._builder = None - else: - self._level = self._level - 1 - self._old_end(*args) - - -# framework document used by the fragment builder. -# Takes a string for the doctype, subset string, and namespace attrs string. - -_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \ - "http://xml.python.org/entities/fragment-builder/internal" - -_FRAGMENT_BUILDER_TEMPLATE = ( - '''\ - -%%s -]> -&fragment-builder-internal;''' - % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID) - - -class FragmentBuilder(ExpatBuilder): - """Builder which constructs document fragments given XML source - text and a context node. - - The context node is expected to provide information about the - namespace declarations which are in scope at the start of the - fragment. - """ - - def __init__(self, context, options=None): - if context.nodeType == DOCUMENT_NODE: - self.originalDocument = context - self.context = context - else: - self.originalDocument = context.ownerDocument - self.context = context - ExpatBuilder.__init__(self, options) - - def reset(self): - ExpatBuilder.reset(self) - self.fragment = None - - def parseFile(self, file): - """Parse a document fragment from a file object, returning the - fragment node.""" - return self.parseString(file.read()) - - def parseString(self, string): - """Parse a document fragment from a string, returning the - fragment node.""" - self._source = string - parser = self.getParser() - doctype = self.originalDocument.doctype - ident = "" - if doctype: - subset = doctype.internalSubset or self._getDeclarations() - if doctype.publicId: - ident = ('PUBLIC "%s" "%s"' - % (doctype.publicId, doctype.systemId)) - elif doctype.systemId: - ident = 'SYSTEM "%s"' % doctype.systemId - else: - subset = "" - nsattrs = self._getNSattrs() # get ns decls from node's ancestors - document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs) - try: - parser.Parse(document, 1) - except: - self.reset() - raise - fragment = self.fragment - self.reset() -## self._parser = None - return fragment - - def _getDeclarations(self): - """Re-create the internal subset from the DocumentType node. - - This is only needed if we don't already have the - internalSubset as a string. - """ - doctype = self.context.ownerDocument.doctype - s = "" - if doctype: - for i in range(doctype.notations.length): - notation = doctype.notations.item(i) - if s: - s = s + "\n " - s = "%s' \ - % (s, notation.publicId, notation.systemId) - else: - s = '%s SYSTEM "%s">' % (s, notation.systemId) - for i in range(doctype.entities.length): - entity = doctype.entities.item(i) - if s: - s = s + "\n " - s = "%s" - return s - - def _getNSattrs(self): - return "" - - def external_entity_ref_handler(self, context, base, systemId, publicId): - if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID: - # this entref is the one that we made to put the subtree - # in; all of our given input is parsed in here. - old_document = self.document - old_cur_node = self.curNode - parser = self._parser.ExternalEntityParserCreate(context) - # put the real document back, parse into the fragment to return - self.document = self.originalDocument - self.fragment = self.document.createDocumentFragment() - self.curNode = self.fragment - try: - parser.Parse(self._source, 1) - finally: - self.curNode = old_cur_node - self.document = old_document - self._source = None - return -1 - else: - return ExpatBuilder.external_entity_ref_handler( - self, context, base, systemId, publicId) - - -class Namespaces: - """Mix-in class for builders; adds support for namespaces.""" - - def _initNamespaces(self): - # list of (prefix, uri) ns declarations. Namespace attrs are - # constructed from this and added to the element's attrs. - self._ns_ordered_prefixes = [] - - def createParser(self): - """Create a new namespace-handling parser.""" - parser = expat.ParserCreate(namespace_separator=" ") - parser.namespace_prefixes = True - return parser - - def install(self, parser): - """Insert the namespace-handlers onto the parser.""" - ExpatBuilder.install(self, parser) - if self._options.namespace_declarations: - parser.StartNamespaceDeclHandler = ( - self.start_namespace_decl_handler) - - def start_namespace_decl_handler(self, prefix, uri): - """Push this namespace declaration on our storage.""" - self._ns_ordered_prefixes.append((prefix, uri)) - - def start_element_handler(self, name, attributes): - if ' ' in name: - uri, localname, prefix, qname = _parse_ns_name(self, name) - else: - uri = EMPTY_NAMESPACE - qname = name - localname = None - prefix = EMPTY_PREFIX - node = minidom.Element(qname, uri, prefix, localname) - node.ownerDocument = self.document - _append_child(self.curNode, node) - self.curNode = node - - if self._ns_ordered_prefixes: - for prefix, uri in self._ns_ordered_prefixes: - if prefix: - a = minidom.Attr(_intern(self, 'xmlns:' + prefix), - XMLNS_NAMESPACE, prefix, "xmlns") - else: - a = minidom.Attr("xmlns", XMLNS_NAMESPACE, - "xmlns", EMPTY_PREFIX) - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = uri - d = a.__dict__ - d['value'] = d['nodeValue'] = uri - d['ownerDocument'] = self.document - _set_attribute_node(node, a) - del self._ns_ordered_prefixes[:] - - if attributes: - _attrs = node._attrs - _attrsNS = node._attrsNS - for i in range(0, len(attributes), 2): - aname = attributes[i] - value = attributes[i+1] - if ' ' in aname: - uri, localname, prefix, qname = _parse_ns_name(self, aname) - a = minidom.Attr(qname, uri, localname, prefix) - _attrs[qname] = a - _attrsNS[(uri, localname)] = a - else: - a = minidom.Attr(aname, EMPTY_NAMESPACE, - aname, EMPTY_PREFIX) - _attrs[aname] = a - _attrsNS[(EMPTY_NAMESPACE, aname)] = a - d = a.childNodes[0].__dict__ - d['data'] = d['nodeValue'] = value - d = a.__dict__ - d['ownerDocument'] = self.document - d['value'] = d['nodeValue'] = value - d['ownerElement'] = node - - if __debug__: - # This only adds some asserts to the original - # end_element_handler(), so we only define this when -O is not - # used. If changing one, be sure to check the other to see if - # it needs to be changed as well. - # - def end_element_handler(self, name): - curNode = self.curNode - if ' ' in name: - uri, localname, prefix, qname = _parse_ns_name(self, name) - assert (curNode.namespaceURI == uri - and curNode.localName == localname - and curNode.prefix == prefix), \ - "element stack messed up! (namespace)" - else: - assert curNode.nodeName == name, \ - "element stack messed up - bad nodeName" - assert curNode.namespaceURI == EMPTY_NAMESPACE, \ - "element stack messed up - bad namespaceURI" - self.curNode = curNode.parentNode - self._finish_end_element(curNode) - - -class ExpatBuilderNS(Namespaces, ExpatBuilder): - """Document builder that supports namespaces.""" - - def reset(self): - ExpatBuilder.reset(self) - self._initNamespaces() - - -class FragmentBuilderNS(Namespaces, FragmentBuilder): - """Fragment builder that supports namespaces.""" - - def reset(self): - FragmentBuilder.reset(self) - self._initNamespaces() - - def _getNSattrs(self): - """Return string of namespace attributes from this element and - ancestors.""" - # XXX This needs to be re-written to walk the ancestors of the - # context to build up the namespace information from - # declarations, elements, and attributes found in context. - # Otherwise we have to store a bunch more data on the DOM - # (though that *might* be more reliable -- not clear). - attrs = "" - context = self.context - L = [] - while context: - if hasattr(context, '_ns_prefix_uri'): - for prefix, uri in context._ns_prefix_uri.items(): - # add every new NS decl from context to L and attrs string - if prefix in L: - continue - L.append(prefix) - if prefix: - declname = "xmlns:" + prefix - else: - declname = "xmlns" - if attrs: - attrs = "%s\n %s='%s'" % (attrs, declname, uri) - else: - attrs = " %s='%s'" % (declname, uri) - context = context.parentNode - return attrs - - -class ParseEscape(Exception): - """Exception raised to short-circuit parsing in InternalSubsetExtractor.""" - pass - -class InternalSubsetExtractor(ExpatBuilder): - """XML processor which can rip out the internal document type subset.""" - - subset = None - - def getSubset(self): - """Return the internal subset as a string.""" - return self.subset - - def parseFile(self, file): - try: - ExpatBuilder.parseFile(self, file) - except ParseEscape: - pass - - def parseString(self, string): - try: - ExpatBuilder.parseString(self, string) - except ParseEscape: - pass - - def install(self, parser): - parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler - parser.StartElementHandler = self.start_element_handler - - def start_doctype_decl_handler(self, name, publicId, systemId, - has_internal_subset): - if has_internal_subset: - parser = self.getParser() - self.subset = [] - parser.DefaultHandler = self.subset.append - parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler - else: - raise ParseEscape() - - def end_doctype_decl_handler(self): - s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n') - self.subset = s - raise ParseEscape() - - def start_element_handler(self, name, attrs): - raise ParseEscape() - - -def parse(file, namespaces=True): - """Parse a document, returning the resulting Document node. - - 'file' may be either a file name or an open file object. - """ - if namespaces: - builder = ExpatBuilderNS() - else: - builder = ExpatBuilder() - - if isinstance(file, StringTypes): - fp = open(file, 'rb') - try: - result = builder.parseFile(fp) - finally: - fp.close() - else: - result = builder.parseFile(file) - return result - - -def parseString(string, namespaces=True): - """Parse a document from a string, returning the resulting - Document node. - """ - if namespaces: - builder = ExpatBuilderNS() - else: - builder = ExpatBuilder() - return builder.parseString(string) - - -def parseFragment(file, context, namespaces=True): - """Parse a fragment of a document, given the context from which it - was originally extracted. context should be the parent of the - node(s) which are in the fragment. - - 'file' may be either a file name or an open file object. - """ - if namespaces: - builder = FragmentBuilderNS(context) - else: - builder = FragmentBuilder(context) - - if isinstance(file, StringTypes): - fp = open(file, 'rb') - try: - result = builder.parseFile(fp) - finally: - fp.close() - else: - result = builder.parseFile(file) - return result - - -def parseFragmentString(string, context, namespaces=True): - """Parse a fragment of a document from a string, given the context - from which it was originally extracted. context should be the - parent of the node(s) which are in the fragment. - """ - if namespaces: - builder = FragmentBuilderNS(context) - else: - builder = FragmentBuilder(context) - return builder.parseString(string) - - -def makeBuilder(options): - """Create a builder based on an Options object.""" - if options.namespaces: - return ExpatBuilderNS(options) - else: - return ExpatBuilder(options) diff --git a/python/Lib/xml/dom/minicompat.py b/python/Lib/xml/dom/minicompat.py deleted file mode 100755 index 266a7f43fe..0000000000 --- a/python/Lib/xml/dom/minicompat.py +++ /dev/null @@ -1,110 +0,0 @@ -"""Python version compatibility support for minidom.""" - -# This module should only be imported using "import *". -# -# The following names are defined: -# -# NodeList -- lightest possible NodeList implementation -# -# EmptyNodeList -- lightest possible NodeList that is guaranteed to -# remain empty (immutable) -# -# StringTypes -- tuple of defined string types -# -# defproperty -- function used in conjunction with GetattrMagic; -# using these together is needed to make them work -# as efficiently as possible in both Python 2.2+ -# and older versions. For example: -# -# class MyClass(GetattrMagic): -# def _get_myattr(self): -# return something -# -# defproperty(MyClass, "myattr", -# "return some value") -# -# For Python 2.2 and newer, this will construct a -# property object on the class, which avoids -# needing to override __getattr__(). It will only -# work for read-only attributes. -# -# For older versions of Python, inheriting from -# GetattrMagic will use the traditional -# __getattr__() hackery to achieve the same effect, -# but less efficiently. -# -# defproperty() should be used for each version of -# the relevant _get_() function. - -__all__ = ["NodeList", "EmptyNodeList", "StringTypes", "defproperty"] - -import xml.dom - -try: - unicode -except NameError: - StringTypes = type(''), -else: - StringTypes = type(''), type(unicode('')) - - -class NodeList(list): - __slots__ = () - - def item(self, index): - if 0 <= index < len(self): - return self[index] - - def _get_length(self): - return len(self) - - def _set_length(self, value): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute 'length'") - - length = property(_get_length, _set_length, - doc="The number of nodes in the NodeList.") - - # For backward compatibility - def __setstate__(self, state): - if state is None: - state = [] - self[:] = state - - -class EmptyNodeList(tuple): - __slots__ = () - - def __add__(self, other): - NL = NodeList() - NL.extend(other) - return NL - - def __radd__(self, other): - NL = NodeList() - NL.extend(other) - return NL - - def item(self, index): - return None - - def _get_length(self): - return 0 - - def _set_length(self, value): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute 'length'") - - length = property(_get_length, _set_length, - doc="The number of nodes in the NodeList.") - - -def defproperty(klass, name, doc): - get = getattr(klass, ("_get_" + name)).im_func - def set(self, value, name=name): - raise xml.dom.NoModificationAllowedErr( - "attempt to modify read-only attribute " + repr(name)) - assert not hasattr(klass, "_set_" + name), \ - "expected not to find _set_" + name - prop = property(get, set, doc=doc) - setattr(klass, name, prop) diff --git a/python/Lib/xml/dom/minidom.py b/python/Lib/xml/dom/minidom.py deleted file mode 100755 index c30e2462ee..0000000000 --- a/python/Lib/xml/dom/minidom.py +++ /dev/null @@ -1,1941 +0,0 @@ -"""Simple implementation of the Level 1 DOM. - -Namespaces and other minor Level 2 features are also supported. - -parse("foo.xml") - -parseString("") - -Todo: -===== - * convenience methods for getting elements and text. - * more testing - * bring some of the writer and linearizer code into conformance with this - interface - * SAX 2 namespaces -""" - -import xml.dom - -from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg -from xml.dom.minicompat import * -from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS - -# This is used by the ID-cache invalidation checks; the list isn't -# actually complete, since the nodes being checked will never be the -# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is -# the node being added or removed, not the node being modified.) -# -_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, - xml.dom.Node.ENTITY_REFERENCE_NODE) - - -class Node(xml.dom.Node): - namespaceURI = None # this is non-null only for elements and attributes - parentNode = None - ownerDocument = None - nextSibling = None - previousSibling = None - - prefix = EMPTY_PREFIX # non-null only for NS elements and attributes - - def __nonzero__(self): - return True - - def toxml(self, encoding = None): - return self.toprettyxml("", "", encoding) - - def toprettyxml(self, indent="\t", newl="\n", encoding = None): - # indent = the indentation string to prepend, per level - # newl = the newline string to append - writer = _get_StringIO() - if encoding is not None: - import codecs - # Can't use codecs.getwriter to preserve 2.0 compatibility - writer = codecs.lookup(encoding)[3](writer) - if self.nodeType == Node.DOCUMENT_NODE: - # Can pass encoding only to document, to put it into XML header - self.writexml(writer, "", indent, newl, encoding) - else: - self.writexml(writer, "", indent, newl) - return writer.getvalue() - - def hasChildNodes(self): - if self.childNodes: - return True - else: - return False - - def _get_childNodes(self): - return self.childNodes - - def _get_firstChild(self): - if self.childNodes: - return self.childNodes[0] - - def _get_lastChild(self): - if self.childNodes: - return self.childNodes[-1] - - def insertBefore(self, newChild, refChild): - if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: - for c in tuple(newChild.childNodes): - self.insertBefore(c, refChild) - ### The DOM does not clearly specify what to return in this case - return newChild - if newChild.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(newChild), repr(self))) - if newChild.parentNode is not None: - newChild.parentNode.removeChild(newChild) - if refChild is None: - self.appendChild(newChild) - else: - try: - index = self.childNodes.index(refChild) - except ValueError: - raise xml.dom.NotFoundErr() - if newChild.nodeType in _nodeTypes_with_children: - _clear_id_cache(self) - self.childNodes.insert(index, newChild) - newChild.nextSibling = refChild - refChild.previousSibling = newChild - if index: - node = self.childNodes[index-1] - node.nextSibling = newChild - newChild.previousSibling = node - else: - newChild.previousSibling = None - newChild.parentNode = self - return newChild - - def appendChild(self, node): - if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: - for c in tuple(node.childNodes): - self.appendChild(c) - ### The DOM does not clearly specify what to return in this case - return node - if node.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(node), repr(self))) - elif node.nodeType in _nodeTypes_with_children: - _clear_id_cache(self) - if node.parentNode is not None: - node.parentNode.removeChild(node) - _append_child(self, node) - node.nextSibling = None - return node - - def replaceChild(self, newChild, oldChild): - if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: - refChild = oldChild.nextSibling - self.removeChild(oldChild) - return self.insertBefore(newChild, refChild) - if newChild.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(newChild), repr(self))) - if newChild is oldChild: - return - if newChild.parentNode is not None: - newChild.parentNode.removeChild(newChild) - try: - index = self.childNodes.index(oldChild) - except ValueError: - raise xml.dom.NotFoundErr() - self.childNodes[index] = newChild - newChild.parentNode = self - oldChild.parentNode = None - if (newChild.nodeType in _nodeTypes_with_children - or oldChild.nodeType in _nodeTypes_with_children): - _clear_id_cache(self) - newChild.nextSibling = oldChild.nextSibling - newChild.previousSibling = oldChild.previousSibling - oldChild.nextSibling = None - oldChild.previousSibling = None - if newChild.previousSibling: - newChild.previousSibling.nextSibling = newChild - if newChild.nextSibling: - newChild.nextSibling.previousSibling = newChild - return oldChild - - def removeChild(self, oldChild): - try: - self.childNodes.remove(oldChild) - except ValueError: - raise xml.dom.NotFoundErr() - if oldChild.nextSibling is not None: - oldChild.nextSibling.previousSibling = oldChild.previousSibling - if oldChild.previousSibling is not None: - oldChild.previousSibling.nextSibling = oldChild.nextSibling - oldChild.nextSibling = oldChild.previousSibling = None - if oldChild.nodeType in _nodeTypes_with_children: - _clear_id_cache(self) - - oldChild.parentNode = None - return oldChild - - def normalize(self): - L = [] - for child in self.childNodes: - if child.nodeType == Node.TEXT_NODE: - if not child.data: - # empty text node; discard - if L: - L[-1].nextSibling = child.nextSibling - if child.nextSibling: - child.nextSibling.previousSibling = child.previousSibling - child.unlink() - elif L and L[-1].nodeType == child.nodeType: - # collapse text node - node = L[-1] - node.data = node.data + child.data - node.nextSibling = child.nextSibling - if child.nextSibling: - child.nextSibling.previousSibling = node - child.unlink() - else: - L.append(child) - else: - L.append(child) - if child.nodeType == Node.ELEMENT_NODE: - child.normalize() - self.childNodes[:] = L - - def cloneNode(self, deep): - return _clone_node(self, deep, self.ownerDocument or self) - - def isSupported(self, feature, version): - return self.ownerDocument.implementation.hasFeature(feature, version) - - def _get_localName(self): - # Overridden in Element and Attr where localName can be Non-Null - return None - - # Node interfaces from Level 3 (WD 9 April 2002) - - def isSameNode(self, other): - return self is other - - def getInterface(self, feature): - if self.isSupported(feature, None): - return self - else: - return None - - # The "user data" functions use a dictionary that is only present - # if some user data has been set, so be careful not to assume it - # exists. - - def getUserData(self, key): - try: - return self._user_data[key][0] - except (AttributeError, KeyError): - return None - - def setUserData(self, key, data, handler): - old = None - try: - d = self._user_data - except AttributeError: - d = {} - self._user_data = d - if key in d: - old = d[key][0] - if data is None: - # ignore handlers passed for None - handler = None - if old is not None: - del d[key] - else: - d[key] = (data, handler) - return old - - def _call_user_data_handler(self, operation, src, dst): - if hasattr(self, "_user_data"): - for key, (data, handler) in self._user_data.items(): - if handler is not None: - handler.handle(operation, key, data, src, dst) - - # minidom-specific API: - - def unlink(self): - self.parentNode = self.ownerDocument = None - if self.childNodes: - for child in self.childNodes: - child.unlink() - self.childNodes = NodeList() - self.previousSibling = None - self.nextSibling = None - -defproperty(Node, "firstChild", doc="First child node, or None.") -defproperty(Node, "lastChild", doc="Last child node, or None.") -defproperty(Node, "localName", doc="Namespace-local name of this node.") - - -def _append_child(self, node): - # fast path with less checks; usable by DOM builders if careful - childNodes = self.childNodes - if childNodes: - last = childNodes[-1] - node.__dict__["previousSibling"] = last - last.__dict__["nextSibling"] = node - childNodes.append(node) - node.__dict__["parentNode"] = self - -def _in_document(node): - # return True iff node is part of a document tree - while node is not None: - if node.nodeType == Node.DOCUMENT_NODE: - return True - node = node.parentNode - return False - -def _write_data(writer, data): - "Writes datachars to writer." - if data: - data = data.replace("&", "&").replace("<", "<"). \ - replace("\"", """).replace(">", ">") - writer.write(data) - -def _get_elements_by_tagName_helper(parent, name, rc): - for node in parent.childNodes: - if node.nodeType == Node.ELEMENT_NODE and \ - (name == "*" or node.tagName == name): - rc.append(node) - _get_elements_by_tagName_helper(node, name, rc) - return rc - -def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): - for node in parent.childNodes: - if node.nodeType == Node.ELEMENT_NODE: - if ((localName == "*" or node.localName == localName) and - (nsURI == "*" or node.namespaceURI == nsURI)): - rc.append(node) - _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) - return rc - -class DocumentFragment(Node): - nodeType = Node.DOCUMENT_FRAGMENT_NODE - nodeName = "#document-fragment" - nodeValue = None - attributes = None - parentNode = None - _child_node_types = (Node.ELEMENT_NODE, - Node.TEXT_NODE, - Node.CDATA_SECTION_NODE, - Node.ENTITY_REFERENCE_NODE, - Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, - Node.NOTATION_NODE) - - def __init__(self): - self.childNodes = NodeList() - - -class Attr(Node): - nodeType = Node.ATTRIBUTE_NODE - attributes = None - ownerElement = None - specified = False - _is_id = False - - _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) - - def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, - prefix=None): - # skip setattr for performance - d = self.__dict__ - d["nodeName"] = d["name"] = qName - d["namespaceURI"] = namespaceURI - d["prefix"] = prefix - d['childNodes'] = NodeList() - - # Add the single child node that represents the value of the attr - self.childNodes.append(Text()) - - # nodeValue and value are set elsewhere - - def _get_localName(self): - return self.nodeName.split(":", 1)[-1] - - def _get_specified(self): - return self.specified - - def __setattr__(self, name, value): - d = self.__dict__ - if name in ("value", "nodeValue"): - d["value"] = d["nodeValue"] = value - d2 = self.childNodes[0].__dict__ - d2["data"] = d2["nodeValue"] = value - if self.ownerElement is not None: - _clear_id_cache(self.ownerElement) - elif name in ("name", "nodeName"): - d["name"] = d["nodeName"] = value - if self.ownerElement is not None: - _clear_id_cache(self.ownerElement) - else: - d[name] = value - - def _set_prefix(self, prefix): - nsuri = self.namespaceURI - if prefix == "xmlns": - if nsuri and nsuri != XMLNS_NAMESPACE: - raise xml.dom.NamespaceErr( - "illegal use of 'xmlns' prefix for the wrong namespace") - d = self.__dict__ - d['prefix'] = prefix - if prefix is None: - newName = self.localName - else: - newName = "%s:%s" % (prefix, self.localName) - if self.ownerElement: - _clear_id_cache(self.ownerElement) - d['nodeName'] = d['name'] = newName - - def _set_value(self, value): - d = self.__dict__ - d['value'] = d['nodeValue'] = value - if self.ownerElement: - _clear_id_cache(self.ownerElement) - self.childNodes[0].data = value - - def unlink(self): - # This implementation does not call the base implementation - # since most of that is not needed, and the expense of the - # method call is not warranted. We duplicate the removal of - # children, but that's all we needed from the base class. - elem = self.ownerElement - if elem is not None: - del elem._attrs[self.nodeName] - del elem._attrsNS[(self.namespaceURI, self.localName)] - if self._is_id: - self._is_id = False - elem._magic_id_nodes -= 1 - self.ownerDocument._magic_id_count -= 1 - for child in self.childNodes: - child.unlink() - del self.childNodes[:] - - def _get_isId(self): - if self._is_id: - return True - doc = self.ownerDocument - elem = self.ownerElement - if doc is None or elem is None: - return False - - info = doc._get_elem_info(elem) - if info is None: - return False - if self.namespaceURI: - return info.isIdNS(self.namespaceURI, self.localName) - else: - return info.isId(self.nodeName) - - def _get_schemaType(self): - doc = self.ownerDocument - elem = self.ownerElement - if doc is None or elem is None: - return _no_type - - info = doc._get_elem_info(elem) - if info is None: - return _no_type - if self.namespaceURI: - return info.getAttributeTypeNS(self.namespaceURI, self.localName) - else: - return info.getAttributeType(self.nodeName) - -defproperty(Attr, "isId", doc="True if this attribute is an ID.") -defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") -defproperty(Attr, "schemaType", doc="Schema type for this attribute.") - - -class NamedNodeMap(object): - """The attribute list is a transient interface to the underlying - dictionaries. Mutations here will change the underlying element's - dictionary. - - Ordering is imposed artificially and does not reflect the order of - attributes as found in an input document. - """ - - __slots__ = ('_attrs', '_attrsNS', '_ownerElement') - - def __init__(self, attrs, attrsNS, ownerElement): - self._attrs = attrs - self._attrsNS = attrsNS - self._ownerElement = ownerElement - - def _get_length(self): - return len(self._attrs) - - def item(self, index): - try: - return self[self._attrs.keys()[index]] - except IndexError: - return None - - def items(self): - L = [] - for node in self._attrs.values(): - L.append((node.nodeName, node.value)) - return L - - def itemsNS(self): - L = [] - for node in self._attrs.values(): - L.append(((node.namespaceURI, node.localName), node.value)) - return L - - def has_key(self, key): - if isinstance(key, StringTypes): - return key in self._attrs - else: - return key in self._attrsNS - - def keys(self): - return self._attrs.keys() - - def keysNS(self): - return self._attrsNS.keys() - - def values(self): - return self._attrs.values() - - def get(self, name, value=None): - return self._attrs.get(name, value) - - __len__ = _get_length - - __hash__ = None # Mutable type can't be correctly hashed - def __cmp__(self, other): - if self._attrs is getattr(other, "_attrs", None): - return 0 - else: - return cmp(id(self), id(other)) - - def __getitem__(self, attname_or_tuple): - if isinstance(attname_or_tuple, tuple): - return self._attrsNS[attname_or_tuple] - else: - return self._attrs[attname_or_tuple] - - # same as set - def __setitem__(self, attname, value): - if isinstance(value, StringTypes): - try: - node = self._attrs[attname] - except KeyError: - node = Attr(attname) - node.ownerDocument = self._ownerElement.ownerDocument - self.setNamedItem(node) - node.value = value - else: - if not isinstance(value, Attr): - raise TypeError, "value must be a string or Attr object" - node = value - self.setNamedItem(node) - - def getNamedItem(self, name): - try: - return self._attrs[name] - except KeyError: - return None - - def getNamedItemNS(self, namespaceURI, localName): - try: - return self._attrsNS[(namespaceURI, localName)] - except KeyError: - return None - - def removeNamedItem(self, name): - n = self.getNamedItem(name) - if n is not None: - _clear_id_cache(self._ownerElement) - del self._attrs[n.nodeName] - del self._attrsNS[(n.namespaceURI, n.localName)] - if 'ownerElement' in n.__dict__: - n.__dict__['ownerElement'] = None - return n - else: - raise xml.dom.NotFoundErr() - - def removeNamedItemNS(self, namespaceURI, localName): - n = self.getNamedItemNS(namespaceURI, localName) - if n is not None: - _clear_id_cache(self._ownerElement) - del self._attrsNS[(n.namespaceURI, n.localName)] - del self._attrs[n.nodeName] - if 'ownerElement' in n.__dict__: - n.__dict__['ownerElement'] = None - return n - else: - raise xml.dom.NotFoundErr() - - def setNamedItem(self, node): - if not isinstance(node, Attr): - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(node), repr(self))) - old = self._attrs.get(node.name) - if old: - old.unlink() - self._attrs[node.name] = node - self._attrsNS[(node.namespaceURI, node.localName)] = node - node.ownerElement = self._ownerElement - _clear_id_cache(node.ownerElement) - return old - - def setNamedItemNS(self, node): - return self.setNamedItem(node) - - def __delitem__(self, attname_or_tuple): - node = self[attname_or_tuple] - _clear_id_cache(node.ownerElement) - node.unlink() - - def __getstate__(self): - return self._attrs, self._attrsNS, self._ownerElement - - def __setstate__(self, state): - self._attrs, self._attrsNS, self._ownerElement = state - -defproperty(NamedNodeMap, "length", - doc="Number of nodes in the NamedNodeMap.") - -AttributeList = NamedNodeMap - - -class TypeInfo(object): - __slots__ = 'namespace', 'name' - - def __init__(self, namespace, name): - self.namespace = namespace - self.name = name - - def __repr__(self): - if self.namespace: - return "" % (self.name, self.namespace) - else: - return "" % self.name - - def _get_name(self): - return self.name - - def _get_namespace(self): - return self.namespace - -_no_type = TypeInfo(None, None) - -class Element(Node): - nodeType = Node.ELEMENT_NODE - nodeValue = None - schemaType = _no_type - - _magic_id_nodes = 0 - - _child_node_types = (Node.ELEMENT_NODE, - Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, - Node.TEXT_NODE, - Node.CDATA_SECTION_NODE, - Node.ENTITY_REFERENCE_NODE) - - def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, - localName=None): - self.tagName = self.nodeName = tagName - self.prefix = prefix - self.namespaceURI = namespaceURI - self.childNodes = NodeList() - - self._attrs = {} # attributes are double-indexed: - self._attrsNS = {} # tagName -> Attribute - # URI,localName -> Attribute - # in the future: consider lazy generation - # of attribute objects this is too tricky - # for now because of headaches with - # namespaces. - - def _get_localName(self): - return self.tagName.split(":", 1)[-1] - - def _get_tagName(self): - return self.tagName - - def unlink(self): - for attr in self._attrs.values(): - attr.unlink() - self._attrs = None - self._attrsNS = None - Node.unlink(self) - - def getAttribute(self, attname): - try: - return self._attrs[attname].value - except KeyError: - return "" - - def getAttributeNS(self, namespaceURI, localName): - try: - return self._attrsNS[(namespaceURI, localName)].value - except KeyError: - return "" - - def setAttribute(self, attname, value): - attr = self.getAttributeNode(attname) - if attr is None: - attr = Attr(attname) - # for performance - d = attr.__dict__ - d["value"] = d["nodeValue"] = value - d["ownerDocument"] = self.ownerDocument - self.setAttributeNode(attr) - elif value != attr.value: - d = attr.__dict__ - d["value"] = d["nodeValue"] = value - if attr.isId: - _clear_id_cache(self) - - def setAttributeNS(self, namespaceURI, qualifiedName, value): - prefix, localname = _nssplit(qualifiedName) - attr = self.getAttributeNodeNS(namespaceURI, localname) - if attr is None: - # for performance - attr = Attr(qualifiedName, namespaceURI, localname, prefix) - d = attr.__dict__ - d["prefix"] = prefix - d["nodeName"] = qualifiedName - d["value"] = d["nodeValue"] = value - d["ownerDocument"] = self.ownerDocument - self.setAttributeNode(attr) - else: - d = attr.__dict__ - if value != attr.value: - d["value"] = d["nodeValue"] = value - if attr.isId: - _clear_id_cache(self) - if attr.prefix != prefix: - d["prefix"] = prefix - d["nodeName"] = qualifiedName - - def getAttributeNode(self, attrname): - return self._attrs.get(attrname) - - def getAttributeNodeNS(self, namespaceURI, localName): - return self._attrsNS.get((namespaceURI, localName)) - - def setAttributeNode(self, attr): - if attr.ownerElement not in (None, self): - raise xml.dom.InuseAttributeErr("attribute node already owned") - old1 = self._attrs.get(attr.name, None) - if old1 is not None: - self.removeAttributeNode(old1) - old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) - if old2 is not None and old2 is not old1: - self.removeAttributeNode(old2) - _set_attribute_node(self, attr) - - if old1 is not attr: - # It might have already been part of this node, in which case - # it doesn't represent a change, and should not be returned. - return old1 - if old2 is not attr: - return old2 - - setAttributeNodeNS = setAttributeNode - - def removeAttribute(self, name): - try: - attr = self._attrs[name] - except KeyError: - raise xml.dom.NotFoundErr() - self.removeAttributeNode(attr) - - def removeAttributeNS(self, namespaceURI, localName): - try: - attr = self._attrsNS[(namespaceURI, localName)] - except KeyError: - raise xml.dom.NotFoundErr() - self.removeAttributeNode(attr) - - def removeAttributeNode(self, node): - if node is None: - raise xml.dom.NotFoundErr() - try: - self._attrs[node.name] - except KeyError: - raise xml.dom.NotFoundErr() - _clear_id_cache(self) - node.unlink() - # Restore this since the node is still useful and otherwise - # unlinked - node.ownerDocument = self.ownerDocument - - removeAttributeNodeNS = removeAttributeNode - - def hasAttribute(self, name): - return name in self._attrs - - def hasAttributeNS(self, namespaceURI, localName): - return (namespaceURI, localName) in self._attrsNS - - def getElementsByTagName(self, name): - return _get_elements_by_tagName_helper(self, name, NodeList()) - - def getElementsByTagNameNS(self, namespaceURI, localName): - return _get_elements_by_tagName_ns_helper( - self, namespaceURI, localName, NodeList()) - - def __repr__(self): - return "" % (self.tagName, id(self)) - - def writexml(self, writer, indent="", addindent="", newl=""): - # indent = current indentation - # addindent = indentation to add to higher levels - # newl = newline string - writer.write(indent+"<" + self.tagName) - - attrs = self._get_attributes() - a_names = attrs.keys() - a_names.sort() - - for a_name in a_names: - writer.write(" %s=\"" % a_name) - _write_data(writer, attrs[a_name].value) - writer.write("\"") - if self.childNodes: - writer.write(">") - if (len(self.childNodes) == 1 and - self.childNodes[0].nodeType == Node.TEXT_NODE): - self.childNodes[0].writexml(writer, '', '', '') - else: - writer.write(newl) - for node in self.childNodes: - node.writexml(writer, indent+addindent, addindent, newl) - writer.write(indent) - writer.write("%s" % (self.tagName, newl)) - else: - writer.write("/>%s"%(newl)) - - def _get_attributes(self): - return NamedNodeMap(self._attrs, self._attrsNS, self) - - def hasAttributes(self): - if self._attrs: - return True - else: - return False - - # DOM Level 3 attributes, based on the 22 Oct 2002 draft - - def setIdAttribute(self, name): - idAttr = self.getAttributeNode(name) - self.setIdAttributeNode(idAttr) - - def setIdAttributeNS(self, namespaceURI, localName): - idAttr = self.getAttributeNodeNS(namespaceURI, localName) - self.setIdAttributeNode(idAttr) - - def setIdAttributeNode(self, idAttr): - if idAttr is None or not self.isSameNode(idAttr.ownerElement): - raise xml.dom.NotFoundErr() - if _get_containing_entref(self) is not None: - raise xml.dom.NoModificationAllowedErr() - if not idAttr._is_id: - idAttr.__dict__['_is_id'] = True - self._magic_id_nodes += 1 - self.ownerDocument._magic_id_count += 1 - _clear_id_cache(self) - -defproperty(Element, "attributes", - doc="NamedNodeMap of attributes on the element.") -defproperty(Element, "localName", - doc="Namespace-local name of this element.") - - -def _set_attribute_node(element, attr): - _clear_id_cache(element) - element._attrs[attr.name] = attr - element._attrsNS[(attr.namespaceURI, attr.localName)] = attr - - # This creates a circular reference, but Element.unlink() - # breaks the cycle since the references to the attribute - # dictionaries are tossed. - attr.__dict__['ownerElement'] = element - - -class Childless: - """Mixin that makes childless-ness easy to implement and avoids - the complexity of the Node methods that deal with children. - """ - - attributes = None - childNodes = EmptyNodeList() - firstChild = None - lastChild = None - - def _get_firstChild(self): - return None - - def _get_lastChild(self): - return None - - def appendChild(self, node): - raise xml.dom.HierarchyRequestErr( - self.nodeName + " nodes cannot have children") - - def hasChildNodes(self): - return False - - def insertBefore(self, newChild, refChild): - raise xml.dom.HierarchyRequestErr( - self.nodeName + " nodes do not have children") - - def removeChild(self, oldChild): - raise xml.dom.NotFoundErr( - self.nodeName + " nodes do not have children") - - def normalize(self): - # For childless nodes, normalize() has nothing to do. - pass - - def replaceChild(self, newChild, oldChild): - raise xml.dom.HierarchyRequestErr( - self.nodeName + " nodes do not have children") - - -class ProcessingInstruction(Childless, Node): - nodeType = Node.PROCESSING_INSTRUCTION_NODE - - def __init__(self, target, data): - self.target = self.nodeName = target - self.data = self.nodeValue = data - - def _get_data(self): - return self.data - def _set_data(self, value): - d = self.__dict__ - d['data'] = d['nodeValue'] = value - - def _get_target(self): - return self.target - def _set_target(self, value): - d = self.__dict__ - d['target'] = d['nodeName'] = value - - def __setattr__(self, name, value): - if name == "data" or name == "nodeValue": - self.__dict__['data'] = self.__dict__['nodeValue'] = value - elif name == "target" or name == "nodeName": - self.__dict__['target'] = self.__dict__['nodeName'] = value - else: - self.__dict__[name] = value - - def writexml(self, writer, indent="", addindent="", newl=""): - writer.write("%s%s" % (indent,self.target, self.data, newl)) - - -class CharacterData(Childless, Node): - def _get_length(self): - return len(self.data) - __len__ = _get_length - - def _get_data(self): - return self.__dict__['data'] - def _set_data(self, data): - d = self.__dict__ - d['data'] = d['nodeValue'] = data - - _get_nodeValue = _get_data - _set_nodeValue = _set_data - - def __setattr__(self, name, value): - if name == "data" or name == "nodeValue": - self.__dict__['data'] = self.__dict__['nodeValue'] = value - else: - self.__dict__[name] = value - - def __repr__(self): - data = self.data - if len(data) > 10: - dotdotdot = "..." - else: - dotdotdot = "" - return '' % ( - self.__class__.__name__, data[0:10], dotdotdot) - - def substringData(self, offset, count): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if count < 0: - raise xml.dom.IndexSizeErr("count cannot be negative") - return self.data[offset:offset+count] - - def appendData(self, arg): - self.data = self.data + arg - - def insertData(self, offset, arg): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if arg: - self.data = "%s%s%s" % ( - self.data[:offset], arg, self.data[offset:]) - - def deleteData(self, offset, count): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if count < 0: - raise xml.dom.IndexSizeErr("count cannot be negative") - if count: - self.data = self.data[:offset] + self.data[offset+count:] - - def replaceData(self, offset, count, arg): - if offset < 0: - raise xml.dom.IndexSizeErr("offset cannot be negative") - if offset >= len(self.data): - raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") - if count < 0: - raise xml.dom.IndexSizeErr("count cannot be negative") - if count: - self.data = "%s%s%s" % ( - self.data[:offset], arg, self.data[offset+count:]) - -defproperty(CharacterData, "length", doc="Length of the string data.") - - -class Text(CharacterData): - # Make sure we don't add an instance __dict__ if we don't already - # have one, at least when that's possible: - # XXX this does not work, CharacterData is an old-style class - # __slots__ = () - - nodeType = Node.TEXT_NODE - nodeName = "#text" - attributes = None - - def splitText(self, offset): - if offset < 0 or offset > len(self.data): - raise xml.dom.IndexSizeErr("illegal offset value") - newText = self.__class__() - newText.data = self.data[offset:] - newText.ownerDocument = self.ownerDocument - next = self.nextSibling - if self.parentNode and self in self.parentNode.childNodes: - if next is None: - self.parentNode.appendChild(newText) - else: - self.parentNode.insertBefore(newText, next) - self.data = self.data[:offset] - return newText - - def writexml(self, writer, indent="", addindent="", newl=""): - _write_data(writer, "%s%s%s" % (indent, self.data, newl)) - - # DOM Level 3 (WD 9 April 2002) - - def _get_wholeText(self): - L = [self.data] - n = self.previousSibling - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - L.insert(0, n.data) - n = n.previousSibling - else: - break - n = self.nextSibling - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - L.append(n.data) - n = n.nextSibling - else: - break - return ''.join(L) - - def replaceWholeText(self, content): - # XXX This needs to be seriously changed if minidom ever - # supports EntityReference nodes. - parent = self.parentNode - n = self.previousSibling - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - next = n.previousSibling - parent.removeChild(n) - n = next - else: - break - n = self.nextSibling - if not content: - parent.removeChild(self) - while n is not None: - if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - next = n.nextSibling - parent.removeChild(n) - n = next - else: - break - if content: - d = self.__dict__ - d['data'] = content - d['nodeValue'] = content - return self - else: - return None - - def _get_isWhitespaceInElementContent(self): - if self.data.strip(): - return False - elem = _get_containing_element(self) - if elem is None: - return False - info = self.ownerDocument._get_elem_info(elem) - if info is None: - return False - else: - return info.isElementContent() - -defproperty(Text, "isWhitespaceInElementContent", - doc="True iff this text node contains only whitespace" - " and is in element content.") -defproperty(Text, "wholeText", - doc="The text of all logically-adjacent text nodes.") - - -def _get_containing_element(node): - c = node.parentNode - while c is not None: - if c.nodeType == Node.ELEMENT_NODE: - return c - c = c.parentNode - return None - -def _get_containing_entref(node): - c = node.parentNode - while c is not None: - if c.nodeType == Node.ENTITY_REFERENCE_NODE: - return c - c = c.parentNode - return None - - -class Comment(Childless, CharacterData): - nodeType = Node.COMMENT_NODE - nodeName = "#comment" - - def __init__(self, data): - self.data = self.nodeValue = data - - def writexml(self, writer, indent="", addindent="", newl=""): - if "--" in self.data: - raise ValueError("'--' is not allowed in a comment node") - writer.write("%s%s" % (indent, self.data, newl)) - - -class CDATASection(Text): - # Make sure we don't add an instance __dict__ if we don't already - # have one, at least when that's possible: - # XXX this does not work, Text is an old-style class - # __slots__ = () - - nodeType = Node.CDATA_SECTION_NODE - nodeName = "#cdata-section" - - def writexml(self, writer, indent="", addindent="", newl=""): - if self.data.find("]]>") >= 0: - raise ValueError("']]>' not allowed in a CDATA section") - writer.write("" % self.data) - - -class ReadOnlySequentialNamedNodeMap(object): - __slots__ = '_seq', - - def __init__(self, seq=()): - # seq should be a list or tuple - self._seq = seq - - def __len__(self): - return len(self._seq) - - def _get_length(self): - return len(self._seq) - - def getNamedItem(self, name): - for n in self._seq: - if n.nodeName == name: - return n - - def getNamedItemNS(self, namespaceURI, localName): - for n in self._seq: - if n.namespaceURI == namespaceURI and n.localName == localName: - return n - - def __getitem__(self, name_or_tuple): - if isinstance(name_or_tuple, tuple): - node = self.getNamedItemNS(*name_or_tuple) - else: - node = self.getNamedItem(name_or_tuple) - if node is None: - raise KeyError, name_or_tuple - return node - - def item(self, index): - if index < 0: - return None - try: - return self._seq[index] - except IndexError: - return None - - def removeNamedItem(self, name): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def removeNamedItemNS(self, namespaceURI, localName): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def setNamedItem(self, node): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def setNamedItemNS(self, node): - raise xml.dom.NoModificationAllowedErr( - "NamedNodeMap instance is read-only") - - def __getstate__(self): - return [self._seq] - - def __setstate__(self, state): - self._seq = state[0] - -defproperty(ReadOnlySequentialNamedNodeMap, "length", - doc="Number of entries in the NamedNodeMap.") - - -class Identified: - """Mix-in class that supports the publicId and systemId attributes.""" - - # XXX this does not work, this is an old-style class - # __slots__ = 'publicId', 'systemId' - - def _identified_mixin_init(self, publicId, systemId): - self.publicId = publicId - self.systemId = systemId - - def _get_publicId(self): - return self.publicId - - def _get_systemId(self): - return self.systemId - -class DocumentType(Identified, Childless, Node): - nodeType = Node.DOCUMENT_TYPE_NODE - nodeValue = None - name = None - publicId = None - systemId = None - internalSubset = None - - def __init__(self, qualifiedName): - self.entities = ReadOnlySequentialNamedNodeMap() - self.notations = ReadOnlySequentialNamedNodeMap() - if qualifiedName: - prefix, localname = _nssplit(qualifiedName) - self.name = localname - self.nodeName = self.name - - def _get_internalSubset(self): - return self.internalSubset - - def cloneNode(self, deep): - if self.ownerDocument is None: - # it's ok - clone = DocumentType(None) - clone.name = self.name - clone.nodeName = self.name - operation = xml.dom.UserDataHandler.NODE_CLONED - if deep: - clone.entities._seq = [] - clone.notations._seq = [] - for n in self.notations._seq: - notation = Notation(n.nodeName, n.publicId, n.systemId) - clone.notations._seq.append(notation) - n._call_user_data_handler(operation, n, notation) - for e in self.entities._seq: - entity = Entity(e.nodeName, e.publicId, e.systemId, - e.notationName) - entity.actualEncoding = e.actualEncoding - entity.encoding = e.encoding - entity.version = e.version - clone.entities._seq.append(entity) - e._call_user_data_handler(operation, n, entity) - self._call_user_data_handler(operation, self, clone) - return clone - else: - return None - - def writexml(self, writer, indent="", addindent="", newl=""): - writer.write(""+newl) - -class Entity(Identified, Node): - attributes = None - nodeType = Node.ENTITY_NODE - nodeValue = None - - actualEncoding = None - encoding = None - version = None - - def __init__(self, name, publicId, systemId, notation): - self.nodeName = name - self.notationName = notation - self.childNodes = NodeList() - self._identified_mixin_init(publicId, systemId) - - def _get_actualEncoding(self): - return self.actualEncoding - - def _get_encoding(self): - return self.encoding - - def _get_version(self): - return self.version - - def appendChild(self, newChild): - raise xml.dom.HierarchyRequestErr( - "cannot append children to an entity node") - - def insertBefore(self, newChild, refChild): - raise xml.dom.HierarchyRequestErr( - "cannot insert children below an entity node") - - def removeChild(self, oldChild): - raise xml.dom.HierarchyRequestErr( - "cannot remove children from an entity node") - - def replaceChild(self, newChild, oldChild): - raise xml.dom.HierarchyRequestErr( - "cannot replace children of an entity node") - -class Notation(Identified, Childless, Node): - nodeType = Node.NOTATION_NODE - nodeValue = None - - def __init__(self, name, publicId, systemId): - self.nodeName = name - self._identified_mixin_init(publicId, systemId) - - -class DOMImplementation(DOMImplementationLS): - _features = [("core", "1.0"), - ("core", "2.0"), - ("core", None), - ("xml", "1.0"), - ("xml", "2.0"), - ("xml", None), - ("ls-load", "3.0"), - ("ls-load", None), - ] - - def hasFeature(self, feature, version): - if version == "": - version = None - return (feature.lower(), version) in self._features - - def createDocument(self, namespaceURI, qualifiedName, doctype): - if doctype and doctype.parentNode is not None: - raise xml.dom.WrongDocumentErr( - "doctype object owned by another DOM tree") - doc = self._create_document() - - add_root_element = not (namespaceURI is None - and qualifiedName is None - and doctype is None) - - if not qualifiedName and add_root_element: - # The spec is unclear what to raise here; SyntaxErr - # would be the other obvious candidate. Since Xerces raises - # InvalidCharacterErr, and since SyntaxErr is not listed - # for createDocument, that seems to be the better choice. - # XXX: need to check for illegal characters here and in - # createElement. - - # DOM Level III clears this up when talking about the return value - # of this function. If namespaceURI, qName and DocType are - # Null the document is returned without a document element - # Otherwise if doctype or namespaceURI are not None - # Then we go back to the above problem - raise xml.dom.InvalidCharacterErr("Element with no name") - - if add_root_element: - prefix, localname = _nssplit(qualifiedName) - if prefix == "xml" \ - and namespaceURI != "http://www.w3.org/XML/1998/namespace": - raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") - if prefix and not namespaceURI: - raise xml.dom.NamespaceErr( - "illegal use of prefix without namespaces") - element = doc.createElementNS(namespaceURI, qualifiedName) - if doctype: - doc.appendChild(doctype) - doc.appendChild(element) - - if doctype: - doctype.parentNode = doctype.ownerDocument = doc - - doc.doctype = doctype - doc.implementation = self - return doc - - def createDocumentType(self, qualifiedName, publicId, systemId): - doctype = DocumentType(qualifiedName) - doctype.publicId = publicId - doctype.systemId = systemId - return doctype - - # DOM Level 3 (WD 9 April 2002) - - def getInterface(self, feature): - if self.hasFeature(feature, None): - return self - else: - return None - - # internal - def _create_document(self): - return Document() - -class ElementInfo(object): - """Object that represents content-model information for an element. - - This implementation is not expected to be used in practice; DOM - builders should provide implementations which do the right thing - using information available to it. - - """ - - __slots__ = 'tagName', - - def __init__(self, name): - self.tagName = name - - def getAttributeType(self, aname): - return _no_type - - def getAttributeTypeNS(self, namespaceURI, localName): - return _no_type - - def isElementContent(self): - return False - - def isEmpty(self): - """Returns true iff this element is declared to have an EMPTY - content model.""" - return False - - def isId(self, aname): - """Returns true iff the named attribute is a DTD-style ID.""" - return False - - def isIdNS(self, namespaceURI, localName): - """Returns true iff the identified attribute is a DTD-style ID.""" - return False - - def __getstate__(self): - return self.tagName - - def __setstate__(self, state): - self.tagName = state - -def _clear_id_cache(node): - if node.nodeType == Node.DOCUMENT_NODE: - node._id_cache.clear() - node._id_search_stack = None - elif _in_document(node): - node.ownerDocument._id_cache.clear() - node.ownerDocument._id_search_stack= None - -class Document(Node, DocumentLS): - _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) - - nodeType = Node.DOCUMENT_NODE - nodeName = "#document" - nodeValue = None - attributes = None - doctype = None - parentNode = None - previousSibling = nextSibling = None - - implementation = DOMImplementation() - - # Document attributes from Level 3 (WD 9 April 2002) - - actualEncoding = None - encoding = None - standalone = None - version = None - strictErrorChecking = False - errorHandler = None - documentURI = None - - _magic_id_count = 0 - - def __init__(self): - self.childNodes = NodeList() - # mapping of (namespaceURI, localName) -> ElementInfo - # and tagName -> ElementInfo - self._elem_info = {} - self._id_cache = {} - self._id_search_stack = None - - def _get_elem_info(self, element): - if element.namespaceURI: - key = element.namespaceURI, element.localName - else: - key = element.tagName - return self._elem_info.get(key) - - def _get_actualEncoding(self): - return self.actualEncoding - - def _get_doctype(self): - return self.doctype - - def _get_documentURI(self): - return self.documentURI - - def _get_encoding(self): - return self.encoding - - def _get_errorHandler(self): - return self.errorHandler - - def _get_standalone(self): - return self.standalone - - def _get_strictErrorChecking(self): - return self.strictErrorChecking - - def _get_version(self): - return self.version - - def appendChild(self, node): - if node.nodeType not in self._child_node_types: - raise xml.dom.HierarchyRequestErr( - "%s cannot be child of %s" % (repr(node), repr(self))) - if node.parentNode is not None: - # This needs to be done before the next test since this - # may *be* the document element, in which case it should - # end up re-ordered to the end. - node.parentNode.removeChild(node) - - if node.nodeType == Node.ELEMENT_NODE \ - and self._get_documentElement(): - raise xml.dom.HierarchyRequestErr( - "two document elements disallowed") - return Node.appendChild(self, node) - - def removeChild(self, oldChild): - try: - self.childNodes.remove(oldChild) - except ValueError: - raise xml.dom.NotFoundErr() - oldChild.nextSibling = oldChild.previousSibling = None - oldChild.parentNode = None - if self.documentElement is oldChild: - self.documentElement = None - - return oldChild - - def _get_documentElement(self): - for node in self.childNodes: - if node.nodeType == Node.ELEMENT_NODE: - return node - - def unlink(self): - if self.doctype is not None: - self.doctype.unlink() - self.doctype = None - Node.unlink(self) - - def cloneNode(self, deep): - if not deep: - return None - clone = self.implementation.createDocument(None, None, None) - clone.encoding = self.encoding - clone.standalone = self.standalone - clone.version = self.version - for n in self.childNodes: - childclone = _clone_node(n, deep, clone) - assert childclone.ownerDocument.isSameNode(clone) - clone.childNodes.append(childclone) - if childclone.nodeType == Node.DOCUMENT_NODE: - assert clone.documentElement is None - elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: - assert clone.doctype is None - clone.doctype = childclone - childclone.parentNode = clone - self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, - self, clone) - return clone - - def createDocumentFragment(self): - d = DocumentFragment() - d.ownerDocument = self - return d - - def createElement(self, tagName): - e = Element(tagName) - e.ownerDocument = self - return e - - def createTextNode(self, data): - if not isinstance(data, StringTypes): - raise TypeError, "node contents must be a string" - t = Text() - t.data = data - t.ownerDocument = self - return t - - def createCDATASection(self, data): - if not isinstance(data, StringTypes): - raise TypeError, "node contents must be a string" - c = CDATASection() - c.data = data - c.ownerDocument = self - return c - - def createComment(self, data): - c = Comment(data) - c.ownerDocument = self - return c - - def createProcessingInstruction(self, target, data): - p = ProcessingInstruction(target, data) - p.ownerDocument = self - return p - - def createAttribute(self, qName): - a = Attr(qName) - a.ownerDocument = self - a.value = "" - return a - - def createElementNS(self, namespaceURI, qualifiedName): - prefix, localName = _nssplit(qualifiedName) - e = Element(qualifiedName, namespaceURI, prefix) - e.ownerDocument = self - return e - - def createAttributeNS(self, namespaceURI, qualifiedName): - prefix, localName = _nssplit(qualifiedName) - a = Attr(qualifiedName, namespaceURI, localName, prefix) - a.ownerDocument = self - a.value = "" - return a - - # A couple of implementation-specific helpers to create node types - # not supported by the W3C DOM specs: - - def _create_entity(self, name, publicId, systemId, notationName): - e = Entity(name, publicId, systemId, notationName) - e.ownerDocument = self - return e - - def _create_notation(self, name, publicId, systemId): - n = Notation(name, publicId, systemId) - n.ownerDocument = self - return n - - def getElementById(self, id): - if id in self._id_cache: - return self._id_cache[id] - if not (self._elem_info or self._magic_id_count): - return None - - stack = self._id_search_stack - if stack is None: - # we never searched before, or the cache has been cleared - stack = [self.documentElement] - self._id_search_stack = stack - elif not stack: - # Previous search was completed and cache is still valid; - # no matching node. - return None - - result = None - while stack: - node = stack.pop() - # add child elements to stack for continued searching - stack.extend([child for child in node.childNodes - if child.nodeType in _nodeTypes_with_children]) - # check this node - info = self._get_elem_info(node) - if info: - # We have to process all ID attributes before - # returning in order to get all the attributes set to - # be IDs using Element.setIdAttribute*(). - for attr in node.attributes.values(): - if attr.namespaceURI: - if info.isIdNS(attr.namespaceURI, attr.localName): - self._id_cache[attr.value] = node - if attr.value == id: - result = node - elif not node._magic_id_nodes: - break - elif info.isId(attr.name): - self._id_cache[attr.value] = node - if attr.value == id: - result = node - elif not node._magic_id_nodes: - break - elif attr._is_id: - self._id_cache[attr.value] = node - if attr.value == id: - result = node - elif node._magic_id_nodes == 1: - break - elif node._magic_id_nodes: - for attr in node.attributes.values(): - if attr._is_id: - self._id_cache[attr.value] = node - if attr.value == id: - result = node - if result is not None: - break - return result - - def getElementsByTagName(self, name): - return _get_elements_by_tagName_helper(self, name, NodeList()) - - def getElementsByTagNameNS(self, namespaceURI, localName): - return _get_elements_by_tagName_ns_helper( - self, namespaceURI, localName, NodeList()) - - def isSupported(self, feature, version): - return self.implementation.hasFeature(feature, version) - - def importNode(self, node, deep): - if node.nodeType == Node.DOCUMENT_NODE: - raise xml.dom.NotSupportedErr("cannot import document nodes") - elif node.nodeType == Node.DOCUMENT_TYPE_NODE: - raise xml.dom.NotSupportedErr("cannot import document type nodes") - return _clone_node(node, deep, self) - - def writexml(self, writer, indent="", addindent="", newl="", - encoding = None): - if encoding is None: - writer.write(''+newl) - else: - writer.write('%s' % (encoding, newl)) - for node in self.childNodes: - node.writexml(writer, indent, addindent, newl) - - # DOM Level 3 (WD 9 April 2002) - - def renameNode(self, n, namespaceURI, name): - if n.ownerDocument is not self: - raise xml.dom.WrongDocumentErr( - "cannot rename nodes from other documents;\n" - "expected %s,\nfound %s" % (self, n.ownerDocument)) - if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): - raise xml.dom.NotSupportedErr( - "renameNode() only applies to element and attribute nodes") - if namespaceURI != EMPTY_NAMESPACE: - if ':' in name: - prefix, localName = name.split(':', 1) - if ( prefix == "xmlns" - and namespaceURI != xml.dom.XMLNS_NAMESPACE): - raise xml.dom.NamespaceErr( - "illegal use of 'xmlns' prefix") - else: - if ( name == "xmlns" - and namespaceURI != xml.dom.XMLNS_NAMESPACE - and n.nodeType == Node.ATTRIBUTE_NODE): - raise xml.dom.NamespaceErr( - "illegal use of the 'xmlns' attribute") - prefix = None - localName = name - else: - prefix = None - localName = None - if n.nodeType == Node.ATTRIBUTE_NODE: - element = n.ownerElement - if element is not None: - is_id = n._is_id - element.removeAttributeNode(n) - else: - element = None - # avoid __setattr__ - d = n.__dict__ - d['prefix'] = prefix - d['localName'] = localName - d['namespaceURI'] = namespaceURI - d['nodeName'] = name - if n.nodeType == Node.ELEMENT_NODE: - d['tagName'] = name - else: - # attribute node - d['name'] = name - if element is not None: - element.setAttributeNode(n) - if is_id: - element.setIdAttributeNode(n) - # It's not clear from a semantic perspective whether we should - # call the user data handlers for the NODE_RENAMED event since - # we're re-using the existing node. The draft spec has been - # interpreted as meaning "no, don't call the handler unless a - # new node is created." - return n - -defproperty(Document, "documentElement", - doc="Top-level element of this document.") - - -def _clone_node(node, deep, newOwnerDocument): - """ - Clone a node and give it the new owner document. - Called by Node.cloneNode and Document.importNode - """ - if node.ownerDocument.isSameNode(newOwnerDocument): - operation = xml.dom.UserDataHandler.NODE_CLONED - else: - operation = xml.dom.UserDataHandler.NODE_IMPORTED - if node.nodeType == Node.ELEMENT_NODE: - clone = newOwnerDocument.createElementNS(node.namespaceURI, - node.nodeName) - for attr in node.attributes.values(): - clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) - a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) - a.specified = attr.specified - - if deep: - for child in node.childNodes: - c = _clone_node(child, deep, newOwnerDocument) - clone.appendChild(c) - - elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: - clone = newOwnerDocument.createDocumentFragment() - if deep: - for child in node.childNodes: - c = _clone_node(child, deep, newOwnerDocument) - clone.appendChild(c) - - elif node.nodeType == Node.TEXT_NODE: - clone = newOwnerDocument.createTextNode(node.data) - elif node.nodeType == Node.CDATA_SECTION_NODE: - clone = newOwnerDocument.createCDATASection(node.data) - elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: - clone = newOwnerDocument.createProcessingInstruction(node.target, - node.data) - elif node.nodeType == Node.COMMENT_NODE: - clone = newOwnerDocument.createComment(node.data) - elif node.nodeType == Node.ATTRIBUTE_NODE: - clone = newOwnerDocument.createAttributeNS(node.namespaceURI, - node.nodeName) - clone.specified = True - clone.value = node.value - elif node.nodeType == Node.DOCUMENT_TYPE_NODE: - assert node.ownerDocument is not newOwnerDocument - operation = xml.dom.UserDataHandler.NODE_IMPORTED - clone = newOwnerDocument.implementation.createDocumentType( - node.name, node.publicId, node.systemId) - clone.ownerDocument = newOwnerDocument - if deep: - clone.entities._seq = [] - clone.notations._seq = [] - for n in node.notations._seq: - notation = Notation(n.nodeName, n.publicId, n.systemId) - notation.ownerDocument = newOwnerDocument - clone.notations._seq.append(notation) - if hasattr(n, '_call_user_data_handler'): - n._call_user_data_handler(operation, n, notation) - for e in node.entities._seq: - entity = Entity(e.nodeName, e.publicId, e.systemId, - e.notationName) - entity.actualEncoding = e.actualEncoding - entity.encoding = e.encoding - entity.version = e.version - entity.ownerDocument = newOwnerDocument - clone.entities._seq.append(entity) - if hasattr(e, '_call_user_data_handler'): - e._call_user_data_handler(operation, n, entity) - else: - # Note the cloning of Document and DocumentType nodes is - # implementation specific. minidom handles those cases - # directly in the cloneNode() methods. - raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) - - # Check for _call_user_data_handler() since this could conceivably - # used with other DOM implementations (one of the FourThought - # DOMs, perhaps?). - if hasattr(node, '_call_user_data_handler'): - node._call_user_data_handler(operation, node, clone) - return clone - - -def _nssplit(qualifiedName): - fields = qualifiedName.split(':', 1) - if len(fields) == 2: - return fields - else: - return (None, fields[0]) - - -def _get_StringIO(): - # we can't use cStringIO since it doesn't support Unicode strings - from StringIO import StringIO - return StringIO() - -def _do_pulldom_parse(func, args, kwargs): - events = func(*args, **kwargs) - toktype, rootNode = events.getEvent() - events.expandNode(rootNode) - events.clear() - return rootNode - -def parse(file, parser=None, bufsize=None): - """Parse a file into a DOM by filename or file object.""" - if parser is None and not bufsize: - from xml.dom import expatbuilder - return expatbuilder.parse(file) - else: - from xml.dom import pulldom - return _do_pulldom_parse(pulldom.parse, (file,), - {'parser': parser, 'bufsize': bufsize}) - -def parseString(string, parser=None): - """Parse a file into a DOM from a string.""" - if parser is None: - from xml.dom import expatbuilder - return expatbuilder.parseString(string) - else: - from xml.dom import pulldom - return _do_pulldom_parse(pulldom.parseString, (string,), - {'parser': parser}) - -def getDOMImplementation(features=None): - if features: - if isinstance(features, StringTypes): - features = domreg._parse_feature_string(features) - for f, v in features: - if not Document.implementation.hasFeature(f, v): - return None - return Document.implementation diff --git a/python/Lib/xml/dom/pulldom.py b/python/Lib/xml/dom/pulldom.py deleted file mode 100755 index 18f49b5019..0000000000 --- a/python/Lib/xml/dom/pulldom.py +++ /dev/null @@ -1,351 +0,0 @@ -import xml.sax -import xml.sax.handler -import types - -try: - _StringTypes = [types.StringType, types.UnicodeType] -except AttributeError: - _StringTypes = [types.StringType] - -START_ELEMENT = "START_ELEMENT" -END_ELEMENT = "END_ELEMENT" -COMMENT = "COMMENT" -START_DOCUMENT = "START_DOCUMENT" -END_DOCUMENT = "END_DOCUMENT" -PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION" -IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" -CHARACTERS = "CHARACTERS" - -class PullDOM(xml.sax.ContentHandler): - _locator = None - document = None - - def __init__(self, documentFactory=None): - from xml.dom import XML_NAMESPACE - self.documentFactory = documentFactory - self.firstEvent = [None, None] - self.lastEvent = self.firstEvent - self.elementStack = [] - self.push = self.elementStack.append - try: - self.pop = self.elementStack.pop - except AttributeError: - # use class' pop instead - pass - self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] - self.pending_events = [] - - def pop(self): - result = self.elementStack[-1] - del self.elementStack[-1] - return result - - def setDocumentLocator(self, locator): - self._locator = locator - - def startPrefixMapping(self, prefix, uri): - if not hasattr(self, '_xmlns_attrs'): - self._xmlns_attrs = [] - self._xmlns_attrs.append((prefix or 'xmlns', uri)) - self._ns_contexts.append(self._current_context.copy()) - self._current_context[uri] = prefix or None - - def endPrefixMapping(self, prefix): - self._current_context = self._ns_contexts.pop() - - def startElementNS(self, name, tagName , attrs): - # Retrieve xml namespace declaration attributes. - xmlns_uri = 'http://www.w3.org/2000/xmlns/' - xmlns_attrs = getattr(self, '_xmlns_attrs', None) - if xmlns_attrs is not None: - for aname, value in xmlns_attrs: - attrs._attrs[(xmlns_uri, aname)] = value - self._xmlns_attrs = [] - uri, localname = name - if uri: - # When using namespaces, the reader may or may not - # provide us with the original name. If not, create - # *a* valid tagName from the current context. - if tagName is None: - prefix = self._current_context[uri] - if prefix: - tagName = prefix + ":" + localname - else: - tagName = localname - if self.document: - node = self.document.createElementNS(uri, tagName) - else: - node = self.buildDocument(uri, tagName) - else: - # When the tagname is not prefixed, it just appears as - # localname - if self.document: - node = self.document.createElement(localname) - else: - node = self.buildDocument(None, localname) - - for aname,value in attrs.items(): - a_uri, a_localname = aname - if a_uri == xmlns_uri: - if a_localname == 'xmlns': - qname = a_localname - else: - qname = 'xmlns:' + a_localname - attr = self.document.createAttributeNS(a_uri, qname) - node.setAttributeNodeNS(attr) - elif a_uri: - prefix = self._current_context[a_uri] - if prefix: - qname = prefix + ":" + a_localname - else: - qname = a_localname - attr = self.document.createAttributeNS(a_uri, qname) - node.setAttributeNodeNS(attr) - else: - attr = self.document.createAttribute(a_localname) - node.setAttributeNode(attr) - attr.value = value - - self.lastEvent[1] = [(START_ELEMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - - def endElementNS(self, name, tagName): - self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] - self.lastEvent = self.lastEvent[1] - - def startElement(self, name, attrs): - if self.document: - node = self.document.createElement(name) - else: - node = self.buildDocument(None, name) - - for aname,value in attrs.items(): - attr = self.document.createAttribute(aname) - attr.value = value - node.setAttributeNode(attr) - - self.lastEvent[1] = [(START_ELEMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - - def endElement(self, name): - self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] - self.lastEvent = self.lastEvent[1] - - def comment(self, s): - if self.document: - node = self.document.createComment(s) - self.lastEvent[1] = [(COMMENT, node), None] - self.lastEvent = self.lastEvent[1] - else: - event = [(COMMENT, s), None] - self.pending_events.append(event) - - def processingInstruction(self, target, data): - if self.document: - node = self.document.createProcessingInstruction(target, data) - self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None] - self.lastEvent = self.lastEvent[1] - else: - event = [(PROCESSING_INSTRUCTION, target, data), None] - self.pending_events.append(event) - - def ignorableWhitespace(self, chars): - node = self.document.createTextNode(chars) - self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] - self.lastEvent = self.lastEvent[1] - - def characters(self, chars): - node = self.document.createTextNode(chars) - self.lastEvent[1] = [(CHARACTERS, node), None] - self.lastEvent = self.lastEvent[1] - - def startDocument(self): - if self.documentFactory is None: - import xml.dom.minidom - self.documentFactory = xml.dom.minidom.Document.implementation - - def buildDocument(self, uri, tagname): - # Can't do that in startDocument, since we need the tagname - # XXX: obtain DocumentType - node = self.documentFactory.createDocument(uri, tagname, None) - self.document = node - self.lastEvent[1] = [(START_DOCUMENT, node), None] - self.lastEvent = self.lastEvent[1] - self.push(node) - # Put everything we have seen so far into the document - for e in self.pending_events: - if e[0][0] == PROCESSING_INSTRUCTION: - _,target,data = e[0] - n = self.document.createProcessingInstruction(target, data) - e[0] = (PROCESSING_INSTRUCTION, n) - elif e[0][0] == COMMENT: - n = self.document.createComment(e[0][1]) - e[0] = (COMMENT, n) - else: - raise AssertionError("Unknown pending event ",e[0][0]) - self.lastEvent[1] = e - self.lastEvent = e - self.pending_events = None - return node.firstChild - - def endDocument(self): - self.lastEvent[1] = [(END_DOCUMENT, self.document), None] - self.pop() - - def clear(self): - "clear(): Explicitly release parsing structures" - self.document = None - -class ErrorHandler: - def warning(self, exception): - print exception - def error(self, exception): - raise exception - def fatalError(self, exception): - raise exception - -class DOMEventStream: - def __init__(self, stream, parser, bufsize): - self.stream = stream - self.parser = parser - self.bufsize = bufsize - if not hasattr(self.parser, 'feed'): - self.getEvent = self._slurp - self.reset() - - def reset(self): - self.pulldom = PullDOM() - # This content handler relies on namespace support - self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) - self.parser.setContentHandler(self.pulldom) - - def __getitem__(self, pos): - rc = self.getEvent() - if rc: - return rc - raise IndexError - - def next(self): - rc = self.getEvent() - if rc: - return rc - raise StopIteration - - def __iter__(self): - return self - - def expandNode(self, node): - event = self.getEvent() - parents = [node] - while event: - token, cur_node = event - if cur_node is node: - return - if token != END_ELEMENT: - parents[-1].appendChild(cur_node) - if token == START_ELEMENT: - parents.append(cur_node) - elif token == END_ELEMENT: - del parents[-1] - event = self.getEvent() - - def getEvent(self): - # use IncrementalParser interface, so we get the desired - # pull effect - if not self.pulldom.firstEvent[1]: - self.pulldom.lastEvent = self.pulldom.firstEvent - while not self.pulldom.firstEvent[1]: - buf = self.stream.read(self.bufsize) - if not buf: - self.parser.close() - return None - self.parser.feed(buf) - rc = self.pulldom.firstEvent[1][0] - self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] - return rc - - def _slurp(self): - """ Fallback replacement for getEvent() using the - standard SAX2 interface, which means we slurp the - SAX events into memory (no performance gain, but - we are compatible to all SAX parsers). - """ - self.parser.parse(self.stream) - self.getEvent = self._emit - return self._emit() - - def _emit(self): - """ Fallback replacement for getEvent() that emits - the events that _slurp() read previously. - """ - rc = self.pulldom.firstEvent[1][0] - self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] - return rc - - def clear(self): - """clear(): Explicitly release parsing objects""" - self.pulldom.clear() - del self.pulldom - self.parser = None - self.stream = None - -class SAX2DOM(PullDOM): - - def startElementNS(self, name, tagName , attrs): - PullDOM.startElementNS(self, name, tagName, attrs) - curNode = self.elementStack[-1] - parentNode = self.elementStack[-2] - parentNode.appendChild(curNode) - - def startElement(self, name, attrs): - PullDOM.startElement(self, name, attrs) - curNode = self.elementStack[-1] - parentNode = self.elementStack[-2] - parentNode.appendChild(curNode) - - def processingInstruction(self, target, data): - PullDOM.processingInstruction(self, target, data) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - def ignorableWhitespace(self, chars): - PullDOM.ignorableWhitespace(self, chars) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - def characters(self, chars): - PullDOM.characters(self, chars) - node = self.lastEvent[0][1] - parentNode = self.elementStack[-1] - parentNode.appendChild(node) - - -default_bufsize = (2 ** 14) - 20 - -def parse(stream_or_string, parser=None, bufsize=None): - if bufsize is None: - bufsize = default_bufsize - if type(stream_or_string) in _StringTypes: - stream = open(stream_or_string) - else: - stream = stream_or_string - if not parser: - parser = xml.sax.make_parser() - return DOMEventStream(stream, parser, bufsize) - -def parseString(string, parser=None): - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - - bufsize = len(string) - buf = StringIO(string) - if not parser: - parser = xml.sax.make_parser() - return DOMEventStream(buf, parser, bufsize) diff --git a/python/Lib/xml/dom/xmlbuilder.py b/python/Lib/xml/dom/xmlbuilder.py deleted file mode 100755 index dc7c5d4705..0000000000 --- a/python/Lib/xml/dom/xmlbuilder.py +++ /dev/null @@ -1,386 +0,0 @@ -"""Implementation of the DOM Level 3 'LS-Load' feature.""" - -import copy -import xml.dom - -from xml.dom.NodeFilter import NodeFilter - - -__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] - - -class Options: - """Features object that has variables set for each DOMBuilder feature. - - The DOMBuilder class uses an instance of this class to pass settings to - the ExpatBuilder class. - """ - - # Note that the DOMBuilder class in LoadSave constrains which of these - # values can be set using the DOM Level 3 LoadSave feature. - - namespaces = 1 - namespace_declarations = True - validation = False - external_parameter_entities = True - external_general_entities = True - external_dtd_subset = True - validate_if_schema = False - validate = False - datatype_normalization = False - create_entity_ref_nodes = True - entities = True - whitespace_in_element_content = True - cdata_sections = True - comments = True - charset_overrides_xml_encoding = True - infoset = False - supported_mediatypes_only = False - - errorHandler = None - filter = None - - -class DOMBuilder: - entityResolver = None - errorHandler = None - filter = None - - ACTION_REPLACE = 1 - ACTION_APPEND_AS_CHILDREN = 2 - ACTION_INSERT_AFTER = 3 - ACTION_INSERT_BEFORE = 4 - - _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, - ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) - - def __init__(self): - self._options = Options() - - def _get_entityResolver(self): - return self.entityResolver - def _set_entityResolver(self, entityResolver): - self.entityResolver = entityResolver - - def _get_errorHandler(self): - return self.errorHandler - def _set_errorHandler(self, errorHandler): - self.errorHandler = errorHandler - - def _get_filter(self): - return self.filter - def _set_filter(self, filter): - self.filter = filter - - def setFeature(self, name, state): - if self.supportsFeature(name): - state = state and 1 or 0 - try: - settings = self._settings[(_name_xform(name), state)] - except KeyError: - raise xml.dom.NotSupportedErr( - "unsupported feature: %r" % (name,)) - else: - for name, value in settings: - setattr(self._options, name, value) - else: - raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) - - def supportsFeature(self, name): - return hasattr(self._options, _name_xform(name)) - - def canSetFeature(self, name, state): - key = (_name_xform(name), state and 1 or 0) - return key in self._settings - - # This dictionary maps from (feature,value) to a list of - # (option,value) pairs that should be set on the Options object. - # If a (feature,value) setting is not in this dictionary, it is - # not supported by the DOMBuilder. - # - _settings = { - ("namespace_declarations", 0): [ - ("namespace_declarations", 0)], - ("namespace_declarations", 1): [ - ("namespace_declarations", 1)], - ("validation", 0): [ - ("validation", 0)], - ("external_general_entities", 0): [ - ("external_general_entities", 0)], - ("external_general_entities", 1): [ - ("external_general_entities", 1)], - ("external_parameter_entities", 0): [ - ("external_parameter_entities", 0)], - ("external_parameter_entities", 1): [ - ("external_parameter_entities", 1)], - ("validate_if_schema", 0): [ - ("validate_if_schema", 0)], - ("create_entity_ref_nodes", 0): [ - ("create_entity_ref_nodes", 0)], - ("create_entity_ref_nodes", 1): [ - ("create_entity_ref_nodes", 1)], - ("entities", 0): [ - ("create_entity_ref_nodes", 0), - ("entities", 0)], - ("entities", 1): [ - ("entities", 1)], - ("whitespace_in_element_content", 0): [ - ("whitespace_in_element_content", 0)], - ("whitespace_in_element_content", 1): [ - ("whitespace_in_element_content", 1)], - ("cdata_sections", 0): [ - ("cdata_sections", 0)], - ("cdata_sections", 1): [ - ("cdata_sections", 1)], - ("comments", 0): [ - ("comments", 0)], - ("comments", 1): [ - ("comments", 1)], - ("charset_overrides_xml_encoding", 0): [ - ("charset_overrides_xml_encoding", 0)], - ("charset_overrides_xml_encoding", 1): [ - ("charset_overrides_xml_encoding", 1)], - ("infoset", 0): [], - ("infoset", 1): [ - ("namespace_declarations", 0), - ("validate_if_schema", 0), - ("create_entity_ref_nodes", 0), - ("entities", 0), - ("cdata_sections", 0), - ("datatype_normalization", 1), - ("whitespace_in_element_content", 1), - ("comments", 1), - ("charset_overrides_xml_encoding", 1)], - ("supported_mediatypes_only", 0): [ - ("supported_mediatypes_only", 0)], - ("namespaces", 0): [ - ("namespaces", 0)], - ("namespaces", 1): [ - ("namespaces", 1)], - } - - def getFeature(self, name): - xname = _name_xform(name) - try: - return getattr(self._options, xname) - except AttributeError: - if name == "infoset": - options = self._options - return (options.datatype_normalization - and options.whitespace_in_element_content - and options.comments - and options.charset_overrides_xml_encoding - and not (options.namespace_declarations - or options.validate_if_schema - or options.create_entity_ref_nodes - or options.entities - or options.cdata_sections)) - raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) - - def parseURI(self, uri): - if self.entityResolver: - input = self.entityResolver.resolveEntity(None, uri) - else: - input = DOMEntityResolver().resolveEntity(None, uri) - return self.parse(input) - - def parse(self, input): - options = copy.copy(self._options) - options.filter = self.filter - options.errorHandler = self.errorHandler - fp = input.byteStream - if fp is None and options.systemId: - import urllib2 - fp = urllib2.urlopen(input.systemId) - return self._parse_bytestream(fp, options) - - def parseWithContext(self, input, cnode, action): - if action not in self._legal_actions: - raise ValueError("not a legal action") - raise NotImplementedError("Haven't written this yet...") - - def _parse_bytestream(self, stream, options): - import xml.dom.expatbuilder - builder = xml.dom.expatbuilder.makeBuilder(options) - return builder.parseFile(stream) - - -def _name_xform(name): - return name.lower().replace('-', '_') - - -class DOMEntityResolver(object): - __slots__ = '_opener', - - def resolveEntity(self, publicId, systemId): - assert systemId is not None - source = DOMInputSource() - source.publicId = publicId - source.systemId = systemId - source.byteStream = self._get_opener().open(systemId) - - # determine the encoding if the transport provided it - source.encoding = self._guess_media_encoding(source) - - # determine the base URI is we can - import posixpath, urlparse - parts = urlparse.urlparse(systemId) - scheme, netloc, path, params, query, fragment = parts - # XXX should we check the scheme here as well? - if path and not path.endswith("/"): - path = posixpath.dirname(path) + "/" - parts = scheme, netloc, path, params, query, fragment - source.baseURI = urlparse.urlunparse(parts) - - return source - - def _get_opener(self): - try: - return self._opener - except AttributeError: - self._opener = self._create_opener() - return self._opener - - def _create_opener(self): - import urllib2 - return urllib2.build_opener() - - def _guess_media_encoding(self, source): - info = source.byteStream.info() - if "Content-Type" in info: - for param in info.getplist(): - if param.startswith("charset="): - return param.split("=", 1)[1].lower() - - -class DOMInputSource(object): - __slots__ = ('byteStream', 'characterStream', 'stringData', - 'encoding', 'publicId', 'systemId', 'baseURI') - - def __init__(self): - self.byteStream = None - self.characterStream = None - self.stringData = None - self.encoding = None - self.publicId = None - self.systemId = None - self.baseURI = None - - def _get_byteStream(self): - return self.byteStream - def _set_byteStream(self, byteStream): - self.byteStream = byteStream - - def _get_characterStream(self): - return self.characterStream - def _set_characterStream(self, characterStream): - self.characterStream = characterStream - - def _get_stringData(self): - return self.stringData - def _set_stringData(self, data): - self.stringData = data - - def _get_encoding(self): - return self.encoding - def _set_encoding(self, encoding): - self.encoding = encoding - - def _get_publicId(self): - return self.publicId - def _set_publicId(self, publicId): - self.publicId = publicId - - def _get_systemId(self): - return self.systemId - def _set_systemId(self, systemId): - self.systemId = systemId - - def _get_baseURI(self): - return self.baseURI - def _set_baseURI(self, uri): - self.baseURI = uri - - -class DOMBuilderFilter: - """Element filter which can be used to tailor construction of - a DOM instance. - """ - - # There's really no need for this class; concrete implementations - # should just implement the endElement() and startElement() - # methods as appropriate. Using this makes it easy to only - # implement one of them. - - FILTER_ACCEPT = 1 - FILTER_REJECT = 2 - FILTER_SKIP = 3 - FILTER_INTERRUPT = 4 - - whatToShow = NodeFilter.SHOW_ALL - - def _get_whatToShow(self): - return self.whatToShow - - def acceptNode(self, element): - return self.FILTER_ACCEPT - - def startContainer(self, element): - return self.FILTER_ACCEPT - -del NodeFilter - - -class DocumentLS: - """Mixin to create documents that conform to the load/save spec.""" - - async = False - - def _get_async(self): - return False - def _set_async(self, async): - if async: - raise xml.dom.NotSupportedErr( - "asynchronous document loading is not supported") - - def abort(self): - # What does it mean to "clear" a document? Does the - # documentElement disappear? - raise NotImplementedError( - "haven't figured out what this means yet") - - def load(self, uri): - raise NotImplementedError("haven't written this yet") - - def loadXML(self, source): - raise NotImplementedError("haven't written this yet") - - def saveXML(self, snode): - if snode is None: - snode = self - elif snode.ownerDocument is not self: - raise xml.dom.WrongDocumentErr() - return snode.toxml() - - -class DOMImplementationLS: - MODE_SYNCHRONOUS = 1 - MODE_ASYNCHRONOUS = 2 - - def createDOMBuilder(self, mode, schemaType): - if schemaType is not None: - raise xml.dom.NotSupportedErr( - "schemaType not yet supported") - if mode == self.MODE_SYNCHRONOUS: - return DOMBuilder() - if mode == self.MODE_ASYNCHRONOUS: - raise xml.dom.NotSupportedErr( - "asynchronous builders are not supported") - raise ValueError("unknown value for mode") - - def createDOMWriter(self): - raise NotImplementedError( - "the writer interface hasn't been written yet!") - - def createDOMInputSource(self): - return DOMInputSource() diff --git a/python/Lib/xml/etree/ElementInclude.py b/python/Lib/xml/etree/ElementInclude.py deleted file mode 100755 index 7e29119fa5..0000000000 --- a/python/Lib/xml/etree/ElementInclude.py +++ /dev/null @@ -1,142 +0,0 @@ -# -# ElementTree -# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $ -# -# limited xinclude support for element trees -# -# history: -# 2003-08-15 fl created -# 2003-11-14 fl fixed default loader -# -# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2008 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -# Licensed to PSF under a Contributor Agreement. -# See http://www.python.org/psf/license for licensing details. - -## -# Limited XInclude support for the ElementTree package. -## - -import copy -from . import ElementTree - -XINCLUDE = "{http://www.w3.org/2001/XInclude}" - -XINCLUDE_INCLUDE = XINCLUDE + "include" -XINCLUDE_FALLBACK = XINCLUDE + "fallback" - -## -# Fatal include error. - -class FatalIncludeError(SyntaxError): - pass - -## -# Default loader. This loader reads an included resource from disk. -# -# @param href Resource reference. -# @param parse Parse mode. Either "xml" or "text". -# @param encoding Optional text encoding. -# @return The expanded resource. If the parse mode is "xml", this -# is an ElementTree instance. If the parse mode is "text", this -# is a Unicode string. If the loader fails, it can return None -# or raise an IOError exception. -# @throws IOError If the loader fails to load the resource. - -def default_loader(href, parse, encoding=None): - with open(href) as file: - if parse == "xml": - data = ElementTree.parse(file).getroot() - else: - data = file.read() - if encoding: - data = data.decode(encoding) - return data - -## -# Expand XInclude directives. -# -# @param elem Root element. -# @param loader Optional resource loader. If omitted, it defaults -# to {@link default_loader}. If given, it should be a callable -# that implements the same interface as default_loader. -# @throws FatalIncludeError If the function fails to include a given -# resource, or if the tree contains malformed XInclude elements. -# @throws IOError If the function fails to load a given resource. - -def include(elem, loader=None): - if loader is None: - loader = default_loader - # look for xinclude elements - i = 0 - while i < len(elem): - e = elem[i] - if e.tag == XINCLUDE_INCLUDE: - # process xinclude directive - href = e.get("href") - parse = e.get("parse", "xml") - if parse == "xml": - node = loader(href, parse) - if node is None: - raise FatalIncludeError( - "cannot load %r as %r" % (href, parse) - ) - node = copy.copy(node) - if e.tail: - node.tail = (node.tail or "") + e.tail - elem[i] = node - elif parse == "text": - text = loader(href, parse, e.get("encoding")) - if text is None: - raise FatalIncludeError( - "cannot load %r as %r" % (href, parse) - ) - if i: - node = elem[i-1] - node.tail = (node.tail or "") + text + (e.tail or "") - else: - elem.text = (elem.text or "") + text + (e.tail or "") - del elem[i] - continue - else: - raise FatalIncludeError( - "unknown parse type in xi:include tag (%r)" % parse - ) - elif e.tag == XINCLUDE_FALLBACK: - raise FatalIncludeError( - "xi:fallback tag must be child of xi:include (%r)" % e.tag - ) - else: - include(e, loader) - i = i + 1 diff --git a/python/Lib/xml/etree/ElementPath.py b/python/Lib/xml/etree/ElementPath.py deleted file mode 100755 index 4a626d799c..0000000000 --- a/python/Lib/xml/etree/ElementPath.py +++ /dev/null @@ -1,303 +0,0 @@ -# -# ElementTree -# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ -# -# limited xpath support for element trees -# -# history: -# 2003-05-23 fl created -# 2003-05-28 fl added support for // etc -# 2003-08-27 fl fixed parsing of periods in element names -# 2007-09-10 fl new selection engine -# 2007-09-12 fl fixed parent selector -# 2007-09-13 fl added iterfind; changed findall to return a list -# 2007-11-30 fl added namespaces support -# 2009-10-30 fl added child element value filter -# -# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2009 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -# Licensed to PSF under a Contributor Agreement. -# See http://www.python.org/psf/license for licensing details. - -## -# Implementation module for XPath support. There's usually no reason -# to import this module directly; the ElementTree does this for -# you, if needed. -## - -import re - -xpath_tokenizer_re = re.compile( - "(" - "'[^']*'|\"[^\"]*\"|" - "::|" - "//?|" - "\.\.|" - "\(\)|" - "[/.*:\[\]\(\)@=])|" - "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" - "\s+" - ) - -def xpath_tokenizer(pattern, namespaces=None): - for token in xpath_tokenizer_re.findall(pattern): - tag = token[1] - if tag and tag[0] != "{" and ":" in tag: - try: - prefix, uri = tag.split(":", 1) - if not namespaces: - raise KeyError - yield token[0], "{%s}%s" % (namespaces[prefix], uri) - except KeyError: - raise SyntaxError("prefix %r not found in prefix map" % prefix) - else: - yield token - -def get_parent_map(context): - parent_map = context.parent_map - if parent_map is None: - context.parent_map = parent_map = {} - for p in context.root.iter(): - for e in p: - parent_map[e] = p - return parent_map - -def prepare_child(next, token): - tag = token[1] - def select(context, result): - for elem in result: - for e in elem: - if e.tag == tag: - yield e - return select - -def prepare_star(next, token): - def select(context, result): - for elem in result: - for e in elem: - yield e - return select - -def prepare_self(next, token): - def select(context, result): - for elem in result: - yield elem - return select - -def prepare_descendant(next, token): - token = next() - if token[0] == "*": - tag = "*" - elif not token[0]: - tag = token[1] - else: - raise SyntaxError("invalid descendant") - def select(context, result): - for elem in result: - for e in elem.iter(tag): - if e is not elem: - yield e - return select - -def prepare_parent(next, token): - def select(context, result): - # FIXME: raise error if .. is applied at toplevel? - parent_map = get_parent_map(context) - result_map = {} - for elem in result: - if elem in parent_map: - parent = parent_map[elem] - if parent not in result_map: - result_map[parent] = None - yield parent - return select - -def prepare_predicate(next, token): - # FIXME: replace with real parser!!! refs: - # http://effbot.org/zone/simple-iterator-parser.htm - # http://javascript.crockford.com/tdop/tdop.html - signature = [] - predicate = [] - while 1: - token = next() - if token[0] == "]": - break - if token[0] and token[0][:1] in "'\"": - token = "'", token[0][1:-1] - signature.append(token[0] or "-") - predicate.append(token[1]) - signature = "".join(signature) - # use signature to determine predicate type - if signature == "@-": - # [@attribute] predicate - key = predicate[1] - def select(context, result): - for elem in result: - if elem.get(key) is not None: - yield elem - return select - if signature == "@-='": - # [@attribute='value'] - key = predicate[1] - value = predicate[-1] - def select(context, result): - for elem in result: - if elem.get(key) == value: - yield elem - return select - if signature == "-" and not re.match("\d+$", predicate[0]): - # [tag] - tag = predicate[0] - def select(context, result): - for elem in result: - if elem.find(tag) is not None: - yield elem - return select - if signature == "-='" and not re.match("\d+$", predicate[0]): - # [tag='value'] - tag = predicate[0] - value = predicate[-1] - def select(context, result): - for elem in result: - for e in elem.findall(tag): - if "".join(e.itertext()) == value: - yield elem - break - return select - if signature == "-" or signature == "-()" or signature == "-()-": - # [index] or [last()] or [last()-index] - if signature == "-": - index = int(predicate[0]) - 1 - else: - if predicate[0] != "last": - raise SyntaxError("unsupported function") - if signature == "-()-": - try: - index = int(predicate[2]) - 1 - except ValueError: - raise SyntaxError("unsupported expression") - else: - index = -1 - def select(context, result): - parent_map = get_parent_map(context) - for elem in result: - try: - parent = parent_map[elem] - # FIXME: what if the selector is "*" ? - elems = list(parent.findall(elem.tag)) - if elems[index] is elem: - yield elem - except (IndexError, KeyError): - pass - return select - raise SyntaxError("invalid predicate") - -ops = { - "": prepare_child, - "*": prepare_star, - ".": prepare_self, - "..": prepare_parent, - "//": prepare_descendant, - "[": prepare_predicate, - } - -_cache = {} - -class _SelectorContext: - parent_map = None - def __init__(self, root): - self.root = root - -# -------------------------------------------------------------------- - -## -# Generate all matching objects. - -def iterfind(elem, path, namespaces=None): - # compile selector pattern - if path[-1:] == "/": - path = path + "*" # implicit all (FIXME: keep this?) - try: - selector = _cache[path] - except KeyError: - if len(_cache) > 100: - _cache.clear() - if path[:1] == "/": - raise SyntaxError("cannot use absolute path on element") - next = iter(xpath_tokenizer(path, namespaces)).next - token = next() - selector = [] - while 1: - try: - selector.append(ops[token[0]](next, token)) - except StopIteration: - raise SyntaxError("invalid path") - try: - token = next() - if token[0] == "/": - token = next() - except StopIteration: - break - _cache[path] = selector - # execute selector pattern - result = [elem] - context = _SelectorContext(elem) - for select in selector: - result = select(context, result) - return result - -## -# Find first matching object. - -def find(elem, path, namespaces=None): - try: - return iterfind(elem, path, namespaces).next() - except StopIteration: - return None - -## -# Find all matching objects. - -def findall(elem, path, namespaces=None): - return list(iterfind(elem, path, namespaces)) - -## -# Find text for first matching object. - -def findtext(elem, path, default=None, namespaces=None): - try: - elem = iterfind(elem, path, namespaces).next() - return elem.text or "" - except StopIteration: - return default diff --git a/python/Lib/xml/etree/ElementTree.py b/python/Lib/xml/etree/ElementTree.py deleted file mode 100755 index cf6402f8c5..0000000000 --- a/python/Lib/xml/etree/ElementTree.py +++ /dev/null @@ -1,1678 +0,0 @@ -# -# ElementTree -# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ -# -# light-weight XML support for Python 2.3 and later. -# -# history (since 1.2.6): -# 2005-11-12 fl added tostringlist/fromstringlist helpers -# 2006-07-05 fl merged in selected changes from the 1.3 sandbox -# 2006-07-05 fl removed support for 2.1 and earlier -# 2007-06-21 fl added deprecation/future warnings -# 2007-08-25 fl added doctype hook, added parser version attribute etc -# 2007-08-26 fl added new serializer code (better namespace handling, etc) -# 2007-08-27 fl warn for broken /tag searches on tree level -# 2007-09-02 fl added html/text methods to serializer (experimental) -# 2007-09-05 fl added method argument to tostring/tostringlist -# 2007-09-06 fl improved error handling -# 2007-09-13 fl added itertext, iterfind; assorted cleanups -# 2007-12-15 fl added C14N hooks, copy method (experimental) -# -# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2008 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -# Licensed to PSF under a Contributor Agreement. -# See http://www.python.org/psf/license for licensing details. - -__all__ = [ - # public symbols - "Comment", - "dump", - "Element", "ElementTree", - "fromstring", "fromstringlist", - "iselement", "iterparse", - "parse", "ParseError", - "PI", "ProcessingInstruction", - "QName", - "SubElement", - "tostring", "tostringlist", - "TreeBuilder", - "VERSION", - "XML", - "XMLParser", "XMLTreeBuilder", - ] - -VERSION = "1.3.0" - -## -# The Element type is a flexible container object, designed to -# store hierarchical data structures in memory. The type can be -# described as a cross between a list and a dictionary. -#

-# Each element has a number of properties associated with it: -#

    -#
  • a tag. This is a string identifying what kind of data -# this element represents (the element type, in other words).
  • -#
  • a number of attributes, stored in a Python dictionary.
  • -#
  • a text string.
  • -#
  • an optional tail string.
  • -#
  • a number of child elements, stored in a Python sequence
  • -#
-# -# To create an element instance, use the {@link #Element} constructor -# or the {@link #SubElement} factory function. -#

-# The {@link #ElementTree} class can be used to wrap an element -# structure, and convert it from and to XML. -## - -import sys -import re -import warnings - - -class _SimpleElementPath(object): - # emulate pre-1.2 find/findtext/findall behaviour - def find(self, element, tag, namespaces=None): - for elem in element: - if elem.tag == tag: - return elem - return None - def findtext(self, element, tag, default=None, namespaces=None): - elem = self.find(element, tag) - if elem is None: - return default - return elem.text or "" - def iterfind(self, element, tag, namespaces=None): - if tag[:3] == ".//": - for elem in element.iter(tag[3:]): - yield elem - for elem in element: - if elem.tag == tag: - yield elem - def findall(self, element, tag, namespaces=None): - return list(self.iterfind(element, tag, namespaces)) - -try: - from . import ElementPath -except ImportError: - ElementPath = _SimpleElementPath() - -## -# Parser error. This is a subclass of SyntaxError. -#

-# In addition to the exception value, an exception instance contains a -# specific exception code in the code attribute, and the line and -# column of the error in the position attribute. - -class ParseError(SyntaxError): - pass - -# -------------------------------------------------------------------- - -## -# Checks if an object appears to be a valid element object. -# -# @param An element instance. -# @return A true value if this is an element object. -# @defreturn flag - -def iselement(element): - # FIXME: not sure about this; might be a better idea to look - # for tag/attrib/text attributes - return isinstance(element, Element) or hasattr(element, "tag") - -## -# Element class. This class defines the Element interface, and -# provides a reference implementation of this interface. -#

-# The element name, attribute names, and attribute values can be -# either ASCII strings (ordinary Python strings containing only 7-bit -# ASCII characters) or Unicode strings. -# -# @param tag The element name. -# @param attrib An optional dictionary, containing element attributes. -# @param **extra Additional attributes, given as keyword arguments. -# @see Element -# @see SubElement -# @see Comment -# @see ProcessingInstruction - -class Element(object): - # text...tail - - ## - # (Attribute) Element tag. - - tag = None - - ## - # (Attribute) Element attribute dictionary. Where possible, use - # {@link #Element.get}, - # {@link #Element.set}, - # {@link #Element.keys}, and - # {@link #Element.items} to access - # element attributes. - - attrib = None - - ## - # (Attribute) Text before first subelement. This is either a - # string or the value None. Note that if there was no text, this - # attribute may be either None or an empty string, depending on - # the parser. - - text = None - - ## - # (Attribute) Text after this element's end tag, but before the - # next sibling element's start tag. This is either a string or - # the value None. Note that if there was no text, this attribute - # may be either None or an empty string, depending on the parser. - - tail = None # text after end tag, if any - - # constructor - - def __init__(self, tag, attrib={}, **extra): - attrib = attrib.copy() - attrib.update(extra) - self.tag = tag - self.attrib = attrib - self._children = [] - - def __repr__(self): - return "" % (repr(self.tag), id(self)) - - ## - # Creates a new element object of the same type as this element. - # - # @param tag Element tag. - # @param attrib Element attributes, given as a dictionary. - # @return A new element instance. - - def makeelement(self, tag, attrib): - return self.__class__(tag, attrib) - - ## - # (Experimental) Copies the current element. This creates a - # shallow copy; subelements will be shared with the original tree. - # - # @return A new element instance. - - def copy(self): - elem = self.makeelement(self.tag, self.attrib) - elem.text = self.text - elem.tail = self.tail - elem[:] = self - return elem - - ## - # Returns the number of subelements. Note that this only counts - # full elements; to check if there's any content in an element, you - # have to check both the length and the text attribute. - # - # @return The number of subelements. - - def __len__(self): - return len(self._children) - - def __nonzero__(self): - warnings.warn( - "The behavior of this method will change in future versions. " - "Use specific 'len(elem)' or 'elem is not None' test instead.", - FutureWarning, stacklevel=2 - ) - return len(self._children) != 0 # emulate old behaviour, for now - - ## - # Returns the given subelement, by index. - # - # @param index What subelement to return. - # @return The given subelement. - # @exception IndexError If the given element does not exist. - - def __getitem__(self, index): - return self._children[index] - - ## - # Replaces the given subelement, by index. - # - # @param index What subelement to replace. - # @param element The new element value. - # @exception IndexError If the given element does not exist. - - def __setitem__(self, index, element): - # if isinstance(index, slice): - # for elt in element: - # assert iselement(elt) - # else: - # assert iselement(element) - self._children[index] = element - - ## - # Deletes the given subelement, by index. - # - # @param index What subelement to delete. - # @exception IndexError If the given element does not exist. - - def __delitem__(self, index): - del self._children[index] - - ## - # Adds a subelement to the end of this element. In document order, - # the new element will appear after the last existing subelement (or - # directly after the text, if it's the first subelement), but before - # the end tag for this element. - # - # @param element The element to add. - - def append(self, element): - # assert iselement(element) - self._children.append(element) - - ## - # Appends subelements from a sequence. - # - # @param elements A sequence object with zero or more elements. - # @since 1.3 - - def extend(self, elements): - # for element in elements: - # assert iselement(element) - self._children.extend(elements) - - ## - # Inserts a subelement at the given position in this element. - # - # @param index Where to insert the new subelement. - - def insert(self, index, element): - # assert iselement(element) - self._children.insert(index, element) - - ## - # Removes a matching subelement. Unlike the find methods, - # this method compares elements based on identity, not on tag - # value or contents. To remove subelements by other means, the - # easiest way is often to use a list comprehension to select what - # elements to keep, and use slice assignment to update the parent - # element. - # - # @param element What element to remove. - # @exception ValueError If a matching element could not be found. - - def remove(self, element): - # assert iselement(element) - self._children.remove(element) - - ## - # (Deprecated) Returns all subelements. The elements are returned - # in document order. - # - # @return A list of subelements. - # @defreturn list of Element instances - - def getchildren(self): - warnings.warn( - "This method will be removed in future versions. " - "Use 'list(elem)' or iteration over elem instead.", - DeprecationWarning, stacklevel=2 - ) - return self._children - - ## - # Finds the first matching subelement, by tag name or path. - # - # @param path What element to look for. - # @keyparam namespaces Optional namespace prefix map. - # @return The first matching element, or None if no element was found. - # @defreturn Element or None - - def find(self, path, namespaces=None): - return ElementPath.find(self, path, namespaces) - - ## - # Finds text for the first matching subelement, by tag name or path. - # - # @param path What element to look for. - # @param default What to return if the element was not found. - # @keyparam namespaces Optional namespace prefix map. - # @return The text content of the first matching element, or the - # default value no element was found. Note that if the element - # is found, but has no text content, this method returns an - # empty string. - # @defreturn string - - def findtext(self, path, default=None, namespaces=None): - return ElementPath.findtext(self, path, default, namespaces) - - ## - # Finds all matching subelements, by tag name or path. - # - # @param path What element to look for. - # @keyparam namespaces Optional namespace prefix map. - # @return A list or other sequence containing all matching elements, - # in document order. - # @defreturn list of Element instances - - def findall(self, path, namespaces=None): - return ElementPath.findall(self, path, namespaces) - - ## - # Finds all matching subelements, by tag name or path. - # - # @param path What element to look for. - # @keyparam namespaces Optional namespace prefix map. - # @return An iterator or sequence containing all matching elements, - # in document order. - # @defreturn a generated sequence of Element instances - - def iterfind(self, path, namespaces=None): - return ElementPath.iterfind(self, path, namespaces) - - ## - # Resets an element. This function removes all subelements, clears - # all attributes, and sets the text and tail attributes - # to None. - - def clear(self): - self.attrib.clear() - self._children = [] - self.text = self.tail = None - - ## - # Gets an element attribute. Equivalent to attrib.get, but - # some implementations may handle this a bit more efficiently. - # - # @param key What attribute to look for. - # @param default What to return if the attribute was not found. - # @return The attribute value, or the default value, if the - # attribute was not found. - # @defreturn string or None - - def get(self, key, default=None): - return self.attrib.get(key, default) - - ## - # Sets an element attribute. Equivalent to attrib[key] = value, - # but some implementations may handle this a bit more efficiently. - # - # @param key What attribute to set. - # @param value The attribute value. - - def set(self, key, value): - self.attrib[key] = value - - ## - # Gets a list of attribute names. The names are returned in an - # arbitrary order (just like for an ordinary Python dictionary). - # Equivalent to attrib.keys(). - # - # @return A list of element attribute names. - # @defreturn list of strings - - def keys(self): - return self.attrib.keys() - - ## - # Gets element attributes, as a sequence. The attributes are - # returned in an arbitrary order. Equivalent to attrib.items(). - # - # @return A list of (name, value) tuples for all attributes. - # @defreturn list of (string, string) tuples - - def items(self): - return self.attrib.items() - - ## - # Creates a tree iterator. The iterator loops over this element - # and all subelements, in document order, and returns all elements - # with a matching tag. - #

- # If the tree structure is modified during iteration, new or removed - # elements may or may not be included. To get a stable set, use the - # list() function on the iterator, and loop over the resulting list. - # - # @param tag What tags to look for (default is to return all elements). - # @return An iterator containing all the matching elements. - # @defreturn iterator - - def iter(self, tag=None): - if tag == "*": - tag = None - if tag is None or self.tag == tag: - yield self - for e in self._children: - for e in e.iter(tag): - yield e - - # compatibility - def getiterator(self, tag=None): - # Change for a DeprecationWarning in 1.4 - warnings.warn( - "This method will be removed in future versions. " - "Use 'elem.iter()' or 'list(elem.iter())' instead.", - PendingDeprecationWarning, stacklevel=2 - ) - return list(self.iter(tag)) - - ## - # Creates a text iterator. The iterator loops over this element - # and all subelements, in document order, and returns all inner - # text. - # - # @return An iterator containing all inner text. - # @defreturn iterator - - def itertext(self): - tag = self.tag - if not isinstance(tag, basestring) and tag is not None: - return - if self.text: - yield self.text - for e in self: - for s in e.itertext(): - yield s - if e.tail: - yield e.tail - -# compatibility -_Element = _ElementInterface = Element - -## -# Subelement factory. This function creates an element instance, and -# appends it to an existing element. -#

-# The element name, attribute names, and attribute values can be -# either 8-bit ASCII strings or Unicode strings. -# -# @param parent The parent element. -# @param tag The subelement name. -# @param attrib An optional dictionary, containing element attributes. -# @param **extra Additional attributes, given as keyword arguments. -# @return An element instance. -# @defreturn Element - -def SubElement(parent, tag, attrib={}, **extra): - attrib = attrib.copy() - attrib.update(extra) - element = parent.makeelement(tag, attrib) - parent.append(element) - return element - -## -# Comment element factory. This factory function creates a special -# element that will be serialized as an XML comment by the standard -# serializer. -#

-# The comment string can be either an 8-bit ASCII string or a Unicode -# string. -# -# @param text A string containing the comment string. -# @return An element instance, representing a comment. -# @defreturn Element - -def Comment(text=None): - element = Element(Comment) - element.text = text - return element - -## -# PI element factory. This factory function creates a special element -# that will be serialized as an XML processing instruction by the standard -# serializer. -# -# @param target A string containing the PI target. -# @param text A string containing the PI contents, if any. -# @return An element instance, representing a PI. -# @defreturn Element - -def ProcessingInstruction(target, text=None): - element = Element(ProcessingInstruction) - element.text = target - if text: - element.text = element.text + " " + text - return element - -PI = ProcessingInstruction - -## -# QName wrapper. This can be used to wrap a QName attribute value, in -# order to get proper namespace handling on output. -# -# @param text A string containing the QName value, in the form {uri}local, -# or, if the tag argument is given, the URI part of a QName. -# @param tag Optional tag. If given, the first argument is interpreted as -# a URI, and this argument is interpreted as a local name. -# @return An opaque object, representing the QName. - -class QName(object): - def __init__(self, text_or_uri, tag=None): - if tag: - text_or_uri = "{%s}%s" % (text_or_uri, tag) - self.text = text_or_uri - def __str__(self): - return self.text - def __hash__(self): - return hash(self.text) - def __cmp__(self, other): - if isinstance(other, QName): - return cmp(self.text, other.text) - return cmp(self.text, other) - -# -------------------------------------------------------------------- - -## -# ElementTree wrapper class. This class represents an entire element -# hierarchy, and adds some extra support for serialization to and from -# standard XML. -# -# @param element Optional root element. -# @keyparam file Optional file handle or file name. If given, the -# tree is initialized with the contents of this XML file. - -class ElementTree(object): - - def __init__(self, element=None, file=None): - # assert element is None or iselement(element) - self._root = element # first node - if file: - self.parse(file) - - ## - # Gets the root element for this tree. - # - # @return An element instance. - # @defreturn Element - - def getroot(self): - return self._root - - ## - # Replaces the root element for this tree. This discards the - # current contents of the tree, and replaces it with the given - # element. Use with care. - # - # @param element An element instance. - - def _setroot(self, element): - # assert iselement(element) - self._root = element - - ## - # Loads an external XML document into this element tree. - # - # @param source A file name or file object. If a file object is - # given, it only has to implement a read(n) method. - # @keyparam parser An optional parser instance. If not given, the - # standard {@link XMLParser} parser is used. - # @return The document root element. - # @defreturn Element - # @exception ParseError If the parser fails to parse the document. - - def parse(self, source, parser=None): - close_source = False - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True - try: - if not parser: - parser = XMLParser(target=TreeBuilder()) - while 1: - data = source.read(65536) - if not data: - break - parser.feed(data) - self._root = parser.close() - return self._root - finally: - if close_source: - source.close() - - ## - # Creates a tree iterator for the root element. The iterator loops - # over all elements in this tree, in document order. - # - # @param tag What tags to look for (default is to return all elements) - # @return An iterator. - # @defreturn iterator - - def iter(self, tag=None): - # assert self._root is not None - return self._root.iter(tag) - - # compatibility - def getiterator(self, tag=None): - # Change for a DeprecationWarning in 1.4 - warnings.warn( - "This method will be removed in future versions. " - "Use 'tree.iter()' or 'list(tree.iter())' instead.", - PendingDeprecationWarning, stacklevel=2 - ) - return list(self.iter(tag)) - - ## - # Same as getroot().find(path), starting at the root of the - # tree. - # - # @param path What element to look for. - # @keyparam namespaces Optional namespace prefix map. - # @return The first matching element, or None if no element was found. - # @defreturn Element or None - - def find(self, path, namespaces=None): - # assert self._root is not None - if path[:1] == "/": - path = "." + path - warnings.warn( - "This search is broken in 1.3 and earlier, and will be " - "fixed in a future version. If you rely on the current " - "behaviour, change it to %r" % path, - FutureWarning, stacklevel=2 - ) - return self._root.find(path, namespaces) - - ## - # Same as getroot().findtext(path), starting at the root of the tree. - # - # @param path What element to look for. - # @param default What to return if the element was not found. - # @keyparam namespaces Optional namespace prefix map. - # @return The text content of the first matching element, or the - # default value no element was found. Note that if the element - # is found, but has no text content, this method returns an - # empty string. - # @defreturn string - - def findtext(self, path, default=None, namespaces=None): - # assert self._root is not None - if path[:1] == "/": - path = "." + path - warnings.warn( - "This search is broken in 1.3 and earlier, and will be " - "fixed in a future version. If you rely on the current " - "behaviour, change it to %r" % path, - FutureWarning, stacklevel=2 - ) - return self._root.findtext(path, default, namespaces) - - ## - # Same as getroot().findall(path), starting at the root of the tree. - # - # @param path What element to look for. - # @keyparam namespaces Optional namespace prefix map. - # @return A list or iterator containing all matching elements, - # in document order. - # @defreturn list of Element instances - - def findall(self, path, namespaces=None): - # assert self._root is not None - if path[:1] == "/": - path = "." + path - warnings.warn( - "This search is broken in 1.3 and earlier, and will be " - "fixed in a future version. If you rely on the current " - "behaviour, change it to %r" % path, - FutureWarning, stacklevel=2 - ) - return self._root.findall(path, namespaces) - - ## - # Finds all matching subelements, by tag name or path. - # Same as getroot().iterfind(path). - # - # @param path What element to look for. - # @keyparam namespaces Optional namespace prefix map. - # @return An iterator or sequence containing all matching elements, - # in document order. - # @defreturn a generated sequence of Element instances - - def iterfind(self, path, namespaces=None): - # assert self._root is not None - if path[:1] == "/": - path = "." + path - warnings.warn( - "This search is broken in 1.3 and earlier, and will be " - "fixed in a future version. If you rely on the current " - "behaviour, change it to %r" % path, - FutureWarning, stacklevel=2 - ) - return self._root.iterfind(path, namespaces) - - ## - # Writes the element tree to a file, as XML. - # - # @def write(file, **options) - # @param file A file name, or a file object opened for writing. - # @param **options Options, given as keyword arguments. - # @keyparam encoding Optional output encoding (default is US-ASCII). - # @keyparam xml_declaration Controls if an XML declaration should - # be added to the file. Use False for never, True for always, - # None for only if not US-ASCII or UTF-8. None is default. - # @keyparam default_namespace Sets the default XML namespace (for "xmlns"). - # @keyparam method Optional output method ("xml", "html", "text" or - # "c14n"; default is "xml"). - - def write(self, file_or_filename, - # keyword arguments - encoding=None, - xml_declaration=None, - default_namespace=None, - method=None): - # assert self._root is not None - if not method: - method = "xml" - elif method not in _serialize: - # FIXME: raise an ImportError for c14n if ElementC14N is missing? - raise ValueError("unknown method %r" % method) - if hasattr(file_or_filename, "write"): - file = file_or_filename - else: - file = open(file_or_filename, "wb") - write = file.write - if not encoding: - if method == "c14n": - encoding = "utf-8" - else: - encoding = "us-ascii" - elif xml_declaration or (xml_declaration is None and - encoding not in ("utf-8", "us-ascii")): - if method == "xml": - write("\n" % encoding) - if method == "text": - _serialize_text(write, self._root, encoding) - else: - qnames, namespaces = _namespaces( - self._root, encoding, default_namespace - ) - serialize = _serialize[method] - serialize(write, self._root, encoding, qnames, namespaces) - if file_or_filename is not file: - file.close() - - def write_c14n(self, file): - # lxml.etree compatibility. use output method instead - return self.write(file, method="c14n") - -# -------------------------------------------------------------------- -# serialization support - -def _namespaces(elem, encoding, default_namespace=None): - # identify namespaces used in this tree - - # maps qnames to *encoded* prefix:local names - qnames = {None: None} - - # maps uri:s to prefixes - namespaces = {} - if default_namespace: - namespaces[default_namespace] = "" - - def encode(text): - return text.encode(encoding) - - def add_qname(qname): - # calculate serialized qname representation - try: - if qname[:1] == "{": - uri, tag = qname[1:].rsplit("}", 1) - prefix = namespaces.get(uri) - if prefix is None: - prefix = _namespace_map.get(uri) - if prefix is None: - prefix = "ns%d" % len(namespaces) - if prefix != "xml": - namespaces[uri] = prefix - if prefix: - qnames[qname] = encode("%s:%s" % (prefix, tag)) - else: - qnames[qname] = encode(tag) # default element - else: - if default_namespace: - # FIXME: can this be handled in XML 1.0? - raise ValueError( - "cannot use non-qualified names with " - "default_namespace option" - ) - qnames[qname] = encode(qname) - except TypeError: - _raise_serialization_error(qname) - - # populate qname and namespaces table - try: - iterate = elem.iter - except AttributeError: - iterate = elem.getiterator # cET compatibility - for elem in iterate(): - tag = elem.tag - if isinstance(tag, QName): - if tag.text not in qnames: - add_qname(tag.text) - elif isinstance(tag, basestring): - if tag not in qnames: - add_qname(tag) - elif tag is not None and tag is not Comment and tag is not PI: - _raise_serialization_error(tag) - for key, value in elem.items(): - if isinstance(key, QName): - key = key.text - if key not in qnames: - add_qname(key) - if isinstance(value, QName) and value.text not in qnames: - add_qname(value.text) - text = elem.text - if isinstance(text, QName) and text.text not in qnames: - add_qname(text.text) - return qnames, namespaces - -def _serialize_xml(write, elem, encoding, qnames, namespaces): - tag = elem.tag - text = elem.text - if tag is Comment: - write("" % _encode(text, encoding)) - elif tag is ProcessingInstruction: - write("" % _encode(text, encoding)) - else: - tag = qnames[tag] - if tag is None: - if text: - write(_escape_cdata(text, encoding)) - for e in elem: - _serialize_xml(write, e, encoding, qnames, None) - else: - write("<" + tag) - items = elem.items() - if items or namespaces: - if namespaces: - for v, k in sorted(namespaces.items(), - key=lambda x: x[1]): # sort on prefix - if k: - k = ":" + k - write(" xmlns%s=\"%s\"" % ( - k.encode(encoding), - _escape_attrib(v, encoding) - )) - for k, v in sorted(items): # lexical order - if isinstance(k, QName): - k = k.text - if isinstance(v, QName): - v = qnames[v.text] - else: - v = _escape_attrib(v, encoding) - write(" %s=\"%s\"" % (qnames[k], v)) - if text or len(elem): - write(">") - if text: - write(_escape_cdata(text, encoding)) - for e in elem: - _serialize_xml(write, e, encoding, qnames, None) - write("") - else: - write(" />") - if elem.tail: - write(_escape_cdata(elem.tail, encoding)) - -HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", - "img", "input", "isindex", "link", "meta", "param") - -try: - HTML_EMPTY = set(HTML_EMPTY) -except NameError: - pass - -def _serialize_html(write, elem, encoding, qnames, namespaces): - tag = elem.tag - text = elem.text - if tag is Comment: - write("" % _escape_cdata(text, encoding)) - elif tag is ProcessingInstruction: - write("" % _escape_cdata(text, encoding)) - else: - tag = qnames[tag] - if tag is None: - if text: - write(_escape_cdata(text, encoding)) - for e in elem: - _serialize_html(write, e, encoding, qnames, None) - else: - write("<" + tag) - items = elem.items() - if items or namespaces: - if namespaces: - for v, k in sorted(namespaces.items(), - key=lambda x: x[1]): # sort on prefix - if k: - k = ":" + k - write(" xmlns%s=\"%s\"" % ( - k.encode(encoding), - _escape_attrib(v, encoding) - )) - for k, v in sorted(items): # lexical order - if isinstance(k, QName): - k = k.text - if isinstance(v, QName): - v = qnames[v.text] - else: - v = _escape_attrib_html(v, encoding) - # FIXME: handle boolean attributes - write(" %s=\"%s\"" % (qnames[k], v)) - write(">") - ltag = tag.lower() - if text: - if ltag == "script" or ltag == "style": - write(_encode(text, encoding)) - else: - write(_escape_cdata(text, encoding)) - for e in elem: - _serialize_html(write, e, encoding, qnames, None) - if ltag not in HTML_EMPTY: - write("") - if elem.tail: - write(_escape_cdata(elem.tail, encoding)) - -def _serialize_text(write, elem, encoding): - for part in elem.itertext(): - write(part.encode(encoding)) - if elem.tail: - write(elem.tail.encode(encoding)) - -_serialize = { - "xml": _serialize_xml, - "html": _serialize_html, - "text": _serialize_text, -# this optional method is imported at the end of the module -# "c14n": _serialize_c14n, -} - -## -# Registers a namespace prefix. The registry is global, and any -# existing mapping for either the given prefix or the namespace URI -# will be removed. -# -# @param prefix Namespace prefix. -# @param uri Namespace uri. Tags and attributes in this namespace -# will be serialized with the given prefix, if at all possible. -# @exception ValueError If the prefix is reserved, or is otherwise -# invalid. - -def register_namespace(prefix, uri): - if re.match("ns\d+$", prefix): - raise ValueError("Prefix format reserved for internal use") - for k, v in _namespace_map.items(): - if k == uri or v == prefix: - del _namespace_map[k] - _namespace_map[uri] = prefix - -_namespace_map = { - # "well-known" namespace prefixes - "http://www.w3.org/XML/1998/namespace": "xml", - "http://www.w3.org/1999/xhtml": "html", - "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", - "http://schemas.xmlsoap.org/wsdl/": "wsdl", - # xml schema - "http://www.w3.org/2001/XMLSchema": "xs", - "http://www.w3.org/2001/XMLSchema-instance": "xsi", - # dublin core - "http://purl.org/dc/elements/1.1/": "dc", -} - -def _raise_serialization_error(text): - raise TypeError( - "cannot serialize %r (type %s)" % (text, type(text).__name__) - ) - -def _encode(text, encoding): - try: - return text.encode(encoding, "xmlcharrefreplace") - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def _escape_cdata(text, encoding): - # escape character data - try: - # it's worth avoiding do-nothing calls for strings that are - # shorter than 500 character, or so. assume that's, by far, - # the most common case in most applications. - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - return text.encode(encoding, "xmlcharrefreplace") - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def _escape_attrib(text, encoding): - # escape attribute value - try: - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - if "\"" in text: - text = text.replace("\"", """) - if "\n" in text: - text = text.replace("\n", " ") - return text.encode(encoding, "xmlcharrefreplace") - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def _escape_attrib_html(text, encoding): - # escape attribute value - try: - if "&" in text: - text = text.replace("&", "&") - if ">" in text: - text = text.replace(">", ">") - if "\"" in text: - text = text.replace("\"", """) - return text.encode(encoding, "xmlcharrefreplace") - except (TypeError, AttributeError): - _raise_serialization_error(text) - -# -------------------------------------------------------------------- - -## -# Generates a string representation of an XML element, including all -# subelements. -# -# @param element An Element instance. -# @keyparam encoding Optional output encoding (default is US-ASCII). -# @keyparam method Optional output method ("xml", "html", "text" or -# "c14n"; default is "xml"). -# @return An encoded string containing the XML data. -# @defreturn string - -def tostring(element, encoding=None, method=None): - class dummy: - pass - data = [] - file = dummy() - file.write = data.append - ElementTree(element).write(file, encoding, method=method) - return "".join(data) - -## -# Generates a string representation of an XML element, including all -# subelements. The string is returned as a sequence of string fragments. -# -# @param element An Element instance. -# @keyparam encoding Optional output encoding (default is US-ASCII). -# @keyparam method Optional output method ("xml", "html", "text" or -# "c14n"; default is "xml"). -# @return A sequence object containing the XML data. -# @defreturn sequence -# @since 1.3 - -def tostringlist(element, encoding=None, method=None): - class dummy: - pass - data = [] - file = dummy() - file.write = data.append - ElementTree(element).write(file, encoding, method=method) - # FIXME: merge small fragments into larger parts - return data - -## -# Writes an element tree or element structure to sys.stdout. This -# function should be used for debugging only. -#

-# The exact output format is implementation dependent. In this -# version, it's written as an ordinary XML file. -# -# @param elem An element tree or an individual element. - -def dump(elem): - # debugging - if not isinstance(elem, ElementTree): - elem = ElementTree(elem) - elem.write(sys.stdout) - tail = elem.getroot().tail - if not tail or tail[-1] != "\n": - sys.stdout.write("\n") - -# -------------------------------------------------------------------- -# parsing - -## -# Parses an XML document into an element tree. -# -# @param source A filename or file object containing XML data. -# @param parser An optional parser instance. If not given, the -# standard {@link XMLParser} parser is used. -# @return An ElementTree instance - -def parse(source, parser=None): - tree = ElementTree() - tree.parse(source, parser) - return tree - -## -# Parses an XML document into an element tree incrementally, and reports -# what's going on to the user. -# -# @param source A filename or file object containing XML data. -# @param events A list of events to report back. If omitted, only "end" -# events are reported. -# @param parser An optional parser instance. If not given, the -# standard {@link XMLParser} parser is used. -# @return A (event, elem) iterator. - -def iterparse(source, events=None, parser=None): - close_source = False - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True - try: - if not parser: - parser = XMLParser(target=TreeBuilder()) - return _IterParseIterator(source, events, parser, close_source) - except: - if close_source: - source.close() - raise - -class _IterParseIterator(object): - - def __init__(self, source, events, parser, close_source=False): - self._file = source - self._close_file = close_source - self._events = [] - self._index = 0 - self._error = None - self.root = self._root = None - self._parser = parser - # wire up the parser for event reporting - parser = self._parser._parser - append = self._events.append - if events is None: - events = ["end"] - for event in events: - if event == "start": - try: - parser.ordered_attributes = 1 - parser.specified_attributes = 1 - def handler(tag, attrib_in, event=event, append=append, - start=self._parser._start_list): - append((event, start(tag, attrib_in))) - parser.StartElementHandler = handler - except AttributeError: - def handler(tag, attrib_in, event=event, append=append, - start=self._parser._start): - append((event, start(tag, attrib_in))) - parser.StartElementHandler = handler - elif event == "end": - def handler(tag, event=event, append=append, - end=self._parser._end): - append((event, end(tag))) - parser.EndElementHandler = handler - elif event == "start-ns": - def handler(prefix, uri, event=event, append=append): - try: - uri = (uri or "").encode("ascii") - except UnicodeError: - pass - append((event, (prefix or "", uri or ""))) - parser.StartNamespaceDeclHandler = handler - elif event == "end-ns": - def handler(prefix, event=event, append=append): - append((event, None)) - parser.EndNamespaceDeclHandler = handler - else: - raise ValueError("unknown event %r" % event) - - def next(self): - try: - while 1: - try: - item = self._events[self._index] - self._index += 1 - return item - except IndexError: - pass - if self._error: - e = self._error - self._error = None - raise e - if self._parser is None: - self.root = self._root - break - # load event buffer - del self._events[:] - self._index = 0 - data = self._file.read(16384) - if data: - try: - self._parser.feed(data) - except SyntaxError as exc: - self._error = exc - else: - self._root = self._parser.close() - self._parser = None - except: - if self._close_file: - self._file.close() - raise - if self._close_file: - self._file.close() - raise StopIteration - - def __iter__(self): - return self - -## -# Parses an XML document from a string constant. This function can -# be used to embed "XML literals" in Python code. -# -# @param source A string containing XML data. -# @param parser An optional parser instance. If not given, the -# standard {@link XMLParser} parser is used. -# @return An Element instance. -# @defreturn Element - -def XML(text, parser=None): - if not parser: - parser = XMLParser(target=TreeBuilder()) - parser.feed(text) - return parser.close() - -## -# Parses an XML document from a string constant, and also returns -# a dictionary which maps from element id:s to elements. -# -# @param source A string containing XML data. -# @param parser An optional parser instance. If not given, the -# standard {@link XMLParser} parser is used. -# @return A tuple containing an Element instance and a dictionary. -# @defreturn (Element, dictionary) - -def XMLID(text, parser=None): - if not parser: - parser = XMLParser(target=TreeBuilder()) - parser.feed(text) - tree = parser.close() - ids = {} - for elem in tree.iter(): - id = elem.get("id") - if id: - ids[id] = elem - return tree, ids - -## -# Parses an XML document from a string constant. Same as {@link #XML}. -# -# @def fromstring(text) -# @param source A string containing XML data. -# @return An Element instance. -# @defreturn Element - -fromstring = XML - -## -# Parses an XML document from a sequence of string fragments. -# -# @param sequence A list or other sequence containing XML data fragments. -# @param parser An optional parser instance. If not given, the -# standard {@link XMLParser} parser is used. -# @return An Element instance. -# @defreturn Element -# @since 1.3 - -def fromstringlist(sequence, parser=None): - if not parser: - parser = XMLParser(target=TreeBuilder()) - for text in sequence: - parser.feed(text) - return parser.close() - -# -------------------------------------------------------------------- - -## -# Generic element structure builder. This builder converts a sequence -# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link -# #TreeBuilder.end} method calls to a well-formed element structure. -#

-# You can use this class to build an element structure using a custom XML -# parser, or a parser for some other XML-like format. -# -# @param element_factory Optional element factory. This factory -# is called to create new Element instances, as necessary. - -class TreeBuilder(object): - - def __init__(self, element_factory=None): - self._data = [] # data collector - self._elem = [] # element stack - self._last = None # last element - self._tail = None # true if we're after an end tag - if element_factory is None: - element_factory = Element - self._factory = element_factory - - ## - # Flushes the builder buffers, and returns the toplevel document - # element. - # - # @return An Element instance. - # @defreturn Element - - def close(self): - assert len(self._elem) == 0, "missing end tags" - assert self._last is not None, "missing toplevel element" - return self._last - - def _flush(self): - if self._data: - if self._last is not None: - text = "".join(self._data) - if self._tail: - assert self._last.tail is None, "internal error (tail)" - self._last.tail = text - else: - assert self._last.text is None, "internal error (text)" - self._last.text = text - self._data = [] - - ## - # Adds text to the current element. - # - # @param data A string. This should be either an 8-bit string - # containing ASCII text, or a Unicode string. - - def data(self, data): - self._data.append(data) - - ## - # Opens a new element. - # - # @param tag The element name. - # @param attrib A dictionary containing element attributes. - # @return The opened element. - # @defreturn Element - - def start(self, tag, attrs): - self._flush() - self._last = elem = self._factory(tag, attrs) - if self._elem: - self._elem[-1].append(elem) - self._elem.append(elem) - self._tail = 0 - return elem - - ## - # Closes the current element. - # - # @param tag The element name. - # @return The closed element. - # @defreturn Element - - def end(self, tag): - self._flush() - self._last = self._elem.pop() - assert self._last.tag == tag,\ - "end tag mismatch (expected %s, got %s)" % ( - self._last.tag, tag) - self._tail = 1 - return self._last - -## -# Element structure builder for XML source data, based on the -# expat parser. -# -# @keyparam target Target object. If omitted, the builder uses an -# instance of the standard {@link #TreeBuilder} class. -# @keyparam html Predefine HTML entities. This flag is not supported -# by the current implementation. -# @keyparam encoding Optional encoding. If given, the value overrides -# the encoding specified in the XML file. -# @see #ElementTree -# @see #TreeBuilder - -class XMLParser(object): - - def __init__(self, html=0, target=None, encoding=None): - try: - from xml.parsers import expat - except ImportError: - try: - import pyexpat as expat - except ImportError: - raise ImportError( - "No module named expat; use SimpleXMLTreeBuilder instead" - ) - parser = expat.ParserCreate(encoding, "}") - if target is None: - target = TreeBuilder() - # underscored names are provided for compatibility only - self.parser = self._parser = parser - self.target = self._target = target - self._error = expat.error - self._names = {} # name memo cache - # callbacks - parser.DefaultHandlerExpand = self._default - parser.StartElementHandler = self._start - parser.EndElementHandler = self._end - parser.CharacterDataHandler = self._data - # optional callbacks - parser.CommentHandler = self._comment - parser.ProcessingInstructionHandler = self._pi - # let expat do the buffering, if supported - try: - self._parser.buffer_text = 1 - except AttributeError: - pass - # use new-style attribute handling, if supported - try: - self._parser.ordered_attributes = 1 - self._parser.specified_attributes = 1 - parser.StartElementHandler = self._start_list - except AttributeError: - pass - self._doctype = None - self.entity = {} - try: - self.version = "Expat %d.%d.%d" % expat.version_info - except AttributeError: - pass # unknown - - def _raiseerror(self, value): - err = ParseError(value) - err.code = value.code - err.position = value.lineno, value.offset - raise err - - def _fixtext(self, text): - # convert text string to ascii, if possible - try: - return text.encode("ascii") - except UnicodeError: - return text - - def _fixname(self, key): - # expand qname, and convert name string to ascii, if possible - try: - name = self._names[key] - except KeyError: - name = key - if "}" in name: - name = "{" + name - self._names[key] = name = self._fixtext(name) - return name - - def _start(self, tag, attrib_in): - fixname = self._fixname - fixtext = self._fixtext - tag = fixname(tag) - attrib = {} - for key, value in attrib_in.items(): - attrib[fixname(key)] = fixtext(value) - return self.target.start(tag, attrib) - - def _start_list(self, tag, attrib_in): - fixname = self._fixname - fixtext = self._fixtext - tag = fixname(tag) - attrib = {} - if attrib_in: - for i in range(0, len(attrib_in), 2): - attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) - return self.target.start(tag, attrib) - - def _data(self, text): - return self.target.data(self._fixtext(text)) - - def _end(self, tag): - return self.target.end(self._fixname(tag)) - - def _comment(self, data): - try: - comment = self.target.comment - except AttributeError: - pass - else: - return comment(self._fixtext(data)) - - def _pi(self, target, data): - try: - pi = self.target.pi - except AttributeError: - pass - else: - return pi(self._fixtext(target), self._fixtext(data)) - - def _default(self, text): - prefix = text[:1] - if prefix == "&": - # deal with undefined entities - try: - self.target.data(self.entity[text[1:-1]]) - except KeyError: - from xml.parsers import expat - err = expat.error( - "undefined entity %s: line %d, column %d" % - (text, self._parser.ErrorLineNumber, - self._parser.ErrorColumnNumber) - ) - err.code = 11 # XML_ERROR_UNDEFINED_ENTITY - err.lineno = self._parser.ErrorLineNumber - err.offset = self._parser.ErrorColumnNumber - raise err - elif prefix == "<" and text[:9] == "": - self._doctype = None - return - text = text.strip() - if not text: - return - self._doctype.append(text) - n = len(self._doctype) - if n > 2: - type = self._doctype[1] - if type == "PUBLIC" and n == 4: - name, type, pubid, system = self._doctype - elif type == "SYSTEM" and n == 3: - name, type, system = self._doctype - pubid = None - else: - return - if pubid: - pubid = pubid[1:-1] - if hasattr(self.target, "doctype"): - self.target.doctype(name, pubid, system[1:-1]) - elif self.doctype is not self._XMLParser__doctype: - # warn about deprecated call - self._XMLParser__doctype(name, pubid, system[1:-1]) - self.doctype(name, pubid, system[1:-1]) - self._doctype = None - - ## - # (Deprecated) Handles a doctype declaration. - # - # @param name Doctype name. - # @param pubid Public identifier. - # @param system System identifier. - - def doctype(self, name, pubid, system): - """This method of XMLParser is deprecated.""" - warnings.warn( - "This method of XMLParser is deprecated. Define doctype() " - "method on the TreeBuilder target.", - DeprecationWarning, - ) - - # sentinel, if doctype is redefined in a subclass - __doctype = doctype - - ## - # Feeds data to the parser. - # - # @param data Encoded data. - - def feed(self, data): - try: - self._parser.Parse(data, 0) - except self._error, v: - self._raiseerror(v) - - ## - # Finishes feeding data to the parser. - # - # @return An element structure. - # @defreturn Element - - def close(self): - try: - self._parser.Parse("", 1) # end of data - except self._error, v: - self._raiseerror(v) - tree = self.target.close() - del self.target, self._parser # get rid of circular references - return tree - -# compatibility -XMLTreeBuilder = XMLParser - -# workaround circular import. -try: - from ElementC14N import _serialize_c14n - _serialize["c14n"] = _serialize_c14n -except ImportError: - pass diff --git a/python/Lib/xml/etree/__init__.py b/python/Lib/xml/etree/__init__.py deleted file mode 100755 index 27fd8f6d4e..0000000000 --- a/python/Lib/xml/etree/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -# $Id: __init__.py 3375 2008-02-13 08:05:08Z fredrik $ -# elementtree package - -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2008 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -# Licensed to PSF under a Contributor Agreement. -# See http://www.python.org/psf/license for licensing details. diff --git a/python/Lib/xml/etree/cElementTree.py b/python/Lib/xml/etree/cElementTree.py deleted file mode 100755 index a6f127abd5..0000000000 --- a/python/Lib/xml/etree/cElementTree.py +++ /dev/null @@ -1,3 +0,0 @@ -# Wrapper module for _elementtree - -from _elementtree import * diff --git a/python/Lib/xml/parsers/__init__.py b/python/Lib/xml/parsers/__init__.py deleted file mode 100755 index eb314a3b40..0000000000 --- a/python/Lib/xml/parsers/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Python interfaces to XML parsers. - -This package contains one module: - -expat -- Python wrapper for James Clark's Expat parser, with namespace - support. - -""" diff --git a/python/Lib/xml/parsers/expat.py b/python/Lib/xml/parsers/expat.py deleted file mode 100755 index 00b5c78ec0..0000000000 --- a/python/Lib/xml/parsers/expat.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Interface to the Expat non-validating XML parser.""" -__version__ = '$Revision: 17640 $' - -from pyexpat import * diff --git a/python/Lib/xml/sax/__init__.py b/python/Lib/xml/sax/__init__.py deleted file mode 100755 index 005b66e38a..0000000000 --- a/python/Lib/xml/sax/__init__.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Simple API for XML (SAX) implementation for Python. - -This module provides an implementation of the SAX 2 interface; -information about the Java version of the interface can be found at -http://www.megginson.com/SAX/. The Python version of the interface is -documented at <...>. - -This package contains the following modules: - -handler -- Base classes and constants which define the SAX 2 API for - the 'client-side' of SAX for Python. - -saxutils -- Implementation of the convenience classes commonly used to - work with SAX. - -xmlreader -- Base classes and constants which define the SAX 2 API for - the parsers used with SAX for Python. - -expatreader -- Driver that allows use of the Expat parser with SAX. -""" - -from xmlreader import InputSource -from handler import ContentHandler, ErrorHandler -from _exceptions import SAXException, SAXNotRecognizedException, \ - SAXParseException, SAXNotSupportedException, \ - SAXReaderNotAvailable - - -def parse(source, handler, errorHandler=ErrorHandler()): - parser = make_parser() - parser.setContentHandler(handler) - parser.setErrorHandler(errorHandler) - parser.parse(source) - -def parseString(string, handler, errorHandler=ErrorHandler()): - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - - if errorHandler is None: - errorHandler = ErrorHandler() - parser = make_parser() - parser.setContentHandler(handler) - parser.setErrorHandler(errorHandler) - - inpsrc = InputSource() - inpsrc.setByteStream(StringIO(string)) - parser.parse(inpsrc) - -# this is the parser list used by the make_parser function if no -# alternatives are given as parameters to the function - -default_parser_list = ["xml.sax.expatreader"] - -# tell modulefinder that importing sax potentially imports expatreader -_false = 0 -if _false: - import xml.sax.expatreader - -import os, sys -if "PY_SAX_PARSER" in os.environ: - default_parser_list = os.environ["PY_SAX_PARSER"].split(",") -del os - -_key = "python.xml.sax.parser" -if sys.platform[:4] == "java" and sys.registry.containsKey(_key): - default_parser_list = sys.registry.getProperty(_key).split(",") - - -def make_parser(parser_list = []): - """Creates and returns a SAX parser. - - Creates the first parser it is able to instantiate of the ones - given in the list created by doing parser_list + - default_parser_list. The lists must contain the names of Python - modules containing both a SAX parser and a create_parser function.""" - - for parser_name in parser_list + default_parser_list: - try: - return _create_parser(parser_name) - except ImportError,e: - import sys - if parser_name in sys.modules: - # The parser module was found, but importing it - # failed unexpectedly, pass this exception through - raise - except SAXReaderNotAvailable: - # The parser module detected that it won't work properly, - # so try the next one - pass - - raise SAXReaderNotAvailable("No parsers found", None) - -# --- Internal utility methods used by make_parser - -if sys.platform[ : 4] == "java": - def _create_parser(parser_name): - from org.python.core import imp - drv_module = imp.importName(parser_name, 0, globals()) - return drv_module.create_parser() - -else: - def _create_parser(parser_name): - drv_module = __import__(parser_name,{},{},['create_parser']) - return drv_module.create_parser() - -del sys diff --git a/python/Lib/xml/sax/_exceptions.py b/python/Lib/xml/sax/_exceptions.py deleted file mode 100755 index a9b2ba35c6..0000000000 --- a/python/Lib/xml/sax/_exceptions.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Different kinds of SAX Exceptions""" -import sys -if sys.platform[:4] == "java": - from java.lang import Exception -del sys - -# ===== SAXEXCEPTION ===== - -class SAXException(Exception): - """Encapsulate an XML error or warning. This class can contain - basic error or warning information from either the XML parser or - the application: you can subclass it to provide additional - functionality, or to add localization. Note that although you will - receive a SAXException as the argument to the handlers in the - ErrorHandler interface, you are not actually required to raise - the exception; instead, you can simply read the information in - it.""" - - def __init__(self, msg, exception=None): - """Creates an exception. The message is required, but the exception - is optional.""" - self._msg = msg - self._exception = exception - Exception.__init__(self, msg) - - def getMessage(self): - "Return a message for this exception." - return self._msg - - def getException(self): - "Return the embedded exception, or None if there was none." - return self._exception - - def __str__(self): - "Create a string representation of the exception." - return self._msg - - def __getitem__(self, ix): - """Avoids weird error messages if someone does exception[ix] by - mistake, since Exception has __getitem__ defined.""" - raise AttributeError("__getitem__") - - -# ===== SAXPARSEEXCEPTION ===== - -class SAXParseException(SAXException): - """Encapsulate an XML parse error or warning. - - This exception will include information for locating the error in - the original XML document. Note that although the application will - receive a SAXParseException as the argument to the handlers in the - ErrorHandler interface, the application is not actually required - to raise the exception; instead, it can simply read the - information in it and take a different action. - - Since this exception is a subclass of SAXException, it inherits - the ability to wrap another exception.""" - - def __init__(self, msg, exception, locator): - "Creates the exception. The exception parameter is allowed to be None." - SAXException.__init__(self, msg, exception) - self._locator = locator - - # We need to cache this stuff at construction time. - # If this exception is raised, the objects through which we must - # traverse to get this information may be deleted by the time - # it gets caught. - self._systemId = self._locator.getSystemId() - self._colnum = self._locator.getColumnNumber() - self._linenum = self._locator.getLineNumber() - - def getColumnNumber(self): - """The column number of the end of the text where the exception - occurred.""" - return self._colnum - - def getLineNumber(self): - "The line number of the end of the text where the exception occurred." - return self._linenum - - def getPublicId(self): - "Get the public identifier of the entity where the exception occurred." - return self._locator.getPublicId() - - def getSystemId(self): - "Get the system identifier of the entity where the exception occurred." - return self._systemId - - def __str__(self): - "Create a string representation of the exception." - sysid = self.getSystemId() - if sysid is None: - sysid = "" - linenum = self.getLineNumber() - if linenum is None: - linenum = "?" - colnum = self.getColumnNumber() - if colnum is None: - colnum = "?" - return "%s:%s:%s: %s" % (sysid, linenum, colnum, self._msg) - - -# ===== SAXNOTRECOGNIZEDEXCEPTION ===== - -class SAXNotRecognizedException(SAXException): - """Exception class for an unrecognized identifier. - - An XMLReader will raise this exception when it is confronted with an - unrecognized feature or property. SAX applications and extensions may - use this class for similar purposes.""" - - -# ===== SAXNOTSUPPORTEDEXCEPTION ===== - -class SAXNotSupportedException(SAXException): - """Exception class for an unsupported operation. - - An XMLReader will raise this exception when a service it cannot - perform is requested (specifically setting a state or value). SAX - applications and extensions may use this class for similar - purposes.""" - -# ===== SAXNOTSUPPORTEDEXCEPTION ===== - -class SAXReaderNotAvailable(SAXNotSupportedException): - """Exception class for a missing driver. - - An XMLReader module (driver) should raise this exception when it - is first imported, e.g. when a support module cannot be imported. - It also may be raised during parsing, e.g. if executing an external - program is not permitted.""" diff --git a/python/Lib/xml/sax/expatreader.py b/python/Lib/xml/sax/expatreader.py deleted file mode 100755 index 21c9db91e9..0000000000 --- a/python/Lib/xml/sax/expatreader.py +++ /dev/null @@ -1,430 +0,0 @@ -""" -SAX driver for the pyexpat C module. This driver works with -pyexpat.__version__ == '2.22'. -""" - -version = "0.20" - -from xml.sax._exceptions import * -from xml.sax.handler import feature_validation, feature_namespaces -from xml.sax.handler import feature_namespace_prefixes -from xml.sax.handler import feature_external_ges, feature_external_pes -from xml.sax.handler import feature_string_interning -from xml.sax.handler import property_xml_string, property_interning_dict - -# xml.parsers.expat does not raise ImportError in Jython -import sys -if sys.platform[:4] == "java": - raise SAXReaderNotAvailable("expat not available in Java", None) -del sys - -try: - from xml.parsers import expat -except ImportError: - raise SAXReaderNotAvailable("expat not supported", None) -else: - if not hasattr(expat, "ParserCreate"): - raise SAXReaderNotAvailable("expat not supported", None) -from xml.sax import xmlreader, saxutils, handler - -AttributesImpl = xmlreader.AttributesImpl -AttributesNSImpl = xmlreader.AttributesNSImpl - -# If we're using a sufficiently recent version of Python, we can use -# weak references to avoid cycles between the parser and content -# handler, otherwise we'll just have to pretend. -try: - import _weakref -except ImportError: - def _mkproxy(o): - return o -else: - import weakref - _mkproxy = weakref.proxy - del weakref, _weakref - -class _ClosedParser: - pass - -# --- ExpatLocator - -class ExpatLocator(xmlreader.Locator): - """Locator for use with the ExpatParser class. - - This uses a weak reference to the parser object to avoid creating - a circular reference between the parser and the content handler. - """ - def __init__(self, parser): - self._ref = _mkproxy(parser) - - def getColumnNumber(self): - parser = self._ref - if parser._parser is None: - return None - return parser._parser.ErrorColumnNumber - - def getLineNumber(self): - parser = self._ref - if parser._parser is None: - return 1 - return parser._parser.ErrorLineNumber - - def getPublicId(self): - parser = self._ref - if parser is None: - return None - return parser._source.getPublicId() - - def getSystemId(self): - parser = self._ref - if parser is None: - return None - return parser._source.getSystemId() - - -# --- ExpatParser - -class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): - """SAX driver for the pyexpat C module.""" - - def __init__(self, namespaceHandling=0, bufsize=2**16-20): - xmlreader.IncrementalParser.__init__(self, bufsize) - self._source = xmlreader.InputSource() - self._parser = None - self._namespaces = namespaceHandling - self._lex_handler_prop = None - self._parsing = 0 - self._entity_stack = [] - self._external_ges = 1 - self._interning = None - - # XMLReader methods - - def parse(self, source): - "Parse an XML document from a URL or an InputSource." - source = saxutils.prepare_input_source(source) - - self._source = source - self.reset() - self._cont_handler.setDocumentLocator(ExpatLocator(self)) - xmlreader.IncrementalParser.parse(self, source) - - def prepareParser(self, source): - if source.getSystemId() is not None: - base = source.getSystemId() - if isinstance(base, unicode): - base = base.encode('utf-8') - self._parser.SetBase(base) - - # Redefined setContentHandler to allow changing handlers during parsing - - def setContentHandler(self, handler): - xmlreader.IncrementalParser.setContentHandler(self, handler) - if self._parsing: - self._reset_cont_handler() - - def getFeature(self, name): - if name == feature_namespaces: - return self._namespaces - elif name == feature_string_interning: - return self._interning is not None - elif name in (feature_validation, feature_external_pes, - feature_namespace_prefixes): - return 0 - elif name == feature_external_ges: - return self._external_ges - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def setFeature(self, name, state): - if self._parsing: - raise SAXNotSupportedException("Cannot set features while parsing") - - if name == feature_namespaces: - self._namespaces = state - elif name == feature_external_ges: - self._external_ges = state - elif name == feature_string_interning: - if state: - if self._interning is None: - self._interning = {} - else: - self._interning = None - elif name == feature_validation: - if state: - raise SAXNotSupportedException( - "expat does not support validation") - elif name == feature_external_pes: - if state: - raise SAXNotSupportedException( - "expat does not read external parameter entities") - elif name == feature_namespace_prefixes: - if state: - raise SAXNotSupportedException( - "expat does not report namespace prefixes") - else: - raise SAXNotRecognizedException( - "Feature '%s' not recognized" % name) - - def getProperty(self, name): - if name == handler.property_lexical_handler: - return self._lex_handler_prop - elif name == property_interning_dict: - return self._interning - elif name == property_xml_string: - if self._parser: - if hasattr(self._parser, "GetInputContext"): - return self._parser.GetInputContext() - else: - raise SAXNotRecognizedException( - "This version of expat does not support getting" - " the XML string") - else: - raise SAXNotSupportedException( - "XML string cannot be returned when not parsing") - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - - def setProperty(self, name, value): - if name == handler.property_lexical_handler: - self._lex_handler_prop = value - if self._parsing: - self._reset_lex_handler_prop() - elif name == property_interning_dict: - self._interning = value - elif name == property_xml_string: - raise SAXNotSupportedException("Property '%s' cannot be set" % - name) - else: - raise SAXNotRecognizedException("Property '%s' not recognized" % - name) - - # IncrementalParser methods - - def feed(self, data, isFinal = 0): - if not self._parsing: - self.reset() - self._parsing = 1 - self._cont_handler.startDocument() - - try: - # The isFinal parameter is internal to the expat reader. - # If it is set to true, expat will check validity of the entire - # document. When feeding chunks, they are not normally final - - # except when invoked from close. - self._parser.Parse(data, isFinal) - except expat.error, e: - exc = SAXParseException(expat.ErrorString(e.code), e, self) - # FIXME: when to invoke error()? - self._err_handler.fatalError(exc) - - def close(self): - if (self._entity_stack or self._parser is None or - isinstance(self._parser, _ClosedParser)): - # If we are completing an external entity, do nothing here - return - try: - self.feed("", isFinal = 1) - self._cont_handler.endDocument() - self._parsing = 0 - # break cycle created by expat handlers pointing to our methods - self._parser = None - finally: - self._parsing = 0 - if self._parser is not None: - # Keep ErrorColumnNumber and ErrorLineNumber after closing. - parser = _ClosedParser() - parser.ErrorColumnNumber = self._parser.ErrorColumnNumber - parser.ErrorLineNumber = self._parser.ErrorLineNumber - self._parser = parser - - def _reset_cont_handler(self): - self._parser.ProcessingInstructionHandler = \ - self._cont_handler.processingInstruction - self._parser.CharacterDataHandler = self._cont_handler.characters - - def _reset_lex_handler_prop(self): - lex = self._lex_handler_prop - parser = self._parser - if lex is None: - parser.CommentHandler = None - parser.StartCdataSectionHandler = None - parser.EndCdataSectionHandler = None - parser.StartDoctypeDeclHandler = None - parser.EndDoctypeDeclHandler = None - else: - parser.CommentHandler = lex.comment - parser.StartCdataSectionHandler = lex.startCDATA - parser.EndCdataSectionHandler = lex.endCDATA - parser.StartDoctypeDeclHandler = self.start_doctype_decl - parser.EndDoctypeDeclHandler = lex.endDTD - - def reset(self): - if self._namespaces: - self._parser = expat.ParserCreate(self._source.getEncoding(), " ", - intern=self._interning) - self._parser.namespace_prefixes = 1 - self._parser.StartElementHandler = self.start_element_ns - self._parser.EndElementHandler = self.end_element_ns - else: - self._parser = expat.ParserCreate(self._source.getEncoding(), - intern = self._interning) - self._parser.StartElementHandler = self.start_element - self._parser.EndElementHandler = self.end_element - - self._reset_cont_handler() - self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl - self._parser.NotationDeclHandler = self.notation_decl - self._parser.StartNamespaceDeclHandler = self.start_namespace_decl - self._parser.EndNamespaceDeclHandler = self.end_namespace_decl - - self._decl_handler_prop = None - if self._lex_handler_prop: - self._reset_lex_handler_prop() -# self._parser.DefaultHandler = -# self._parser.DefaultHandlerExpand = -# self._parser.NotStandaloneHandler = - self._parser.ExternalEntityRefHandler = self.external_entity_ref - try: - self._parser.SkippedEntityHandler = self.skipped_entity_handler - except AttributeError: - # This pyexpat does not support SkippedEntity - pass - self._parser.SetParamEntityParsing( - expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) - - self._parsing = 0 - self._entity_stack = [] - - # Locator methods - - def getColumnNumber(self): - if self._parser is None: - return None - return self._parser.ErrorColumnNumber - - def getLineNumber(self): - if self._parser is None: - return 1 - return self._parser.ErrorLineNumber - - def getPublicId(self): - return self._source.getPublicId() - - def getSystemId(self): - return self._source.getSystemId() - - # event handlers - def start_element(self, name, attrs): - self._cont_handler.startElement(name, AttributesImpl(attrs)) - - def end_element(self, name): - self._cont_handler.endElement(name) - - def start_element_ns(self, name, attrs): - pair = name.split() - if len(pair) == 1: - # no namespace - pair = (None, name) - elif len(pair) == 3: - pair = pair[0], pair[1] - else: - # default namespace - pair = tuple(pair) - - newattrs = {} - qnames = {} - for (aname, value) in attrs.items(): - parts = aname.split() - length = len(parts) - if length == 1: - # no namespace - qname = aname - apair = (None, aname) - elif length == 3: - qname = "%s:%s" % (parts[2], parts[1]) - apair = parts[0], parts[1] - else: - # default namespace - qname = parts[1] - apair = tuple(parts) - - newattrs[apair] = value - qnames[apair] = qname - - self._cont_handler.startElementNS(pair, None, - AttributesNSImpl(newattrs, qnames)) - - def end_element_ns(self, name): - pair = name.split() - if len(pair) == 1: - pair = (None, name) - elif len(pair) == 3: - pair = pair[0], pair[1] - else: - pair = tuple(pair) - - self._cont_handler.endElementNS(pair, None) - - # this is not used (call directly to ContentHandler) - def processing_instruction(self, target, data): - self._cont_handler.processingInstruction(target, data) - - # this is not used (call directly to ContentHandler) - def character_data(self, data): - self._cont_handler.characters(data) - - def start_namespace_decl(self, prefix, uri): - self._cont_handler.startPrefixMapping(prefix, uri) - - def end_namespace_decl(self, prefix): - self._cont_handler.endPrefixMapping(prefix) - - def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): - self._lex_handler_prop.startDTD(name, pubid, sysid) - - def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): - self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) - - def notation_decl(self, name, base, sysid, pubid): - self._dtd_handler.notationDecl(name, pubid, sysid) - - def external_entity_ref(self, context, base, sysid, pubid): - if not self._external_ges: - return 1 - - source = self._ent_handler.resolveEntity(pubid, sysid) - source = saxutils.prepare_input_source(source, - self._source.getSystemId() or - "") - - self._entity_stack.append((self._parser, self._source)) - self._parser = self._parser.ExternalEntityParserCreate(context) - self._source = source - - try: - xmlreader.IncrementalParser.parse(self, source) - except: - return 0 # FIXME: save error info here? - - (self._parser, self._source) = self._entity_stack[-1] - del self._entity_stack[-1] - return 1 - - def skipped_entity_handler(self, name, is_pe): - if is_pe: - # The SAX spec requires to report skipped PEs with a '%' - name = '%'+name - self._cont_handler.skippedEntity(name) - -# --- - -def create_parser(*args, **kwargs): - return ExpatParser(*args, **kwargs) - -# --- - -if __name__ == "__main__": - import xml.sax.saxutils - p = create_parser() - p.setContentHandler(xml.sax.saxutils.XMLGenerator()) - p.setErrorHandler(xml.sax.ErrorHandler()) - p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml") diff --git a/python/Lib/xml/sax/handler.py b/python/Lib/xml/sax/handler.py deleted file mode 100755 index f9e91b6d47..0000000000 --- a/python/Lib/xml/sax/handler.py +++ /dev/null @@ -1,342 +0,0 @@ -""" -This module contains the core classes of version 2.0 of SAX for Python. -This file provides only default classes with absolutely minimum -functionality, from which drivers and applications can be subclassed. - -Many of these classes are empty and are included only as documentation -of the interfaces. - -$Id$ -""" - -version = '2.0beta' - -#============================================================================ -# -# HANDLER INTERFACES -# -#============================================================================ - -# ===== ERRORHANDLER ===== - -class ErrorHandler: - """Basic interface for SAX error handlers. - - If you create an object that implements this interface, then - register the object with your XMLReader, the parser will call the - methods in your object to report all warnings and errors. There - are three levels of errors available: warnings, (possibly) - recoverable errors, and unrecoverable errors. All methods take a - SAXParseException as the only parameter.""" - - def error(self, exception): - "Handle a recoverable error." - raise exception - - def fatalError(self, exception): - "Handle a non-recoverable error." - raise exception - - def warning(self, exception): - "Handle a warning." - print exception - - -# ===== CONTENTHANDLER ===== - -class ContentHandler: - """Interface for receiving logical document content events. - - This is the main callback interface in SAX, and the one most - important to applications. The order of events in this interface - mirrors the order of the information in the document.""" - - def __init__(self): - self._locator = None - - def setDocumentLocator(self, locator): - """Called by the parser to give the application a locator for - locating the origin of document events. - - SAX parsers are strongly encouraged (though not absolutely - required) to supply a locator: if it does so, it must supply - the locator to the application by invoking this method before - invoking any of the other methods in the DocumentHandler - interface. - - The locator allows the application to determine the end - position of any document-related event, even if the parser is - not reporting an error. Typically, the application will use - this information for reporting its own errors (such as - character content that does not match an application's - business rules). The information returned by the locator is - probably not sufficient for use with a search engine. - - Note that the locator will return correct information only - during the invocation of the events in this interface. The - application should not attempt to use it at any other time.""" - self._locator = locator - - def startDocument(self): - """Receive notification of the beginning of a document. - - The SAX parser will invoke this method only once, before any - other methods in this interface or in DTDHandler (except for - setDocumentLocator).""" - - def endDocument(self): - """Receive notification of the end of a document. - - The SAX parser will invoke this method only once, and it will - be the last method invoked during the parse. The parser shall - not invoke this method until it has either abandoned parsing - (because of an unrecoverable error) or reached the end of - input.""" - - def startPrefixMapping(self, prefix, uri): - """Begin the scope of a prefix-URI Namespace mapping. - - The information from this event is not necessary for normal - Namespace processing: the SAX XML reader will automatically - replace prefixes for element and attribute names when the - http://xml.org/sax/features/namespaces feature is true (the - default). - - There are cases, however, when applications need to use - prefixes in character data or in attribute values, where they - cannot safely be expanded automatically; the - start/endPrefixMapping event supplies the information to the - application to expand prefixes in those contexts itself, if - necessary. - - Note that start/endPrefixMapping events are not guaranteed to - be properly nested relative to each-other: all - startPrefixMapping events will occur before the corresponding - startElement event, and all endPrefixMapping events will occur - after the corresponding endElement event, but their order is - not guaranteed.""" - - def endPrefixMapping(self, prefix): - """End the scope of a prefix-URI mapping. - - See startPrefixMapping for details. This event will always - occur after the corresponding endElement event, but the order - of endPrefixMapping events is not otherwise guaranteed.""" - - def startElement(self, name, attrs): - """Signals the start of an element in non-namespace mode. - - The name parameter contains the raw XML 1.0 name of the - element type as a string and the attrs parameter holds an - instance of the Attributes class containing the attributes of - the element.""" - - def endElement(self, name): - """Signals the end of an element in non-namespace mode. - - The name parameter contains the name of the element type, just - as with the startElement event.""" - - def startElementNS(self, name, qname, attrs): - """Signals the start of an element in namespace mode. - - The name parameter contains the name of the element type as a - (uri, localname) tuple, the qname parameter the raw XML 1.0 - name used in the source document, and the attrs parameter - holds an instance of the Attributes class containing the - attributes of the element. - - The uri part of the name tuple is None for elements which have - no namespace.""" - - def endElementNS(self, name, qname): - """Signals the end of an element in namespace mode. - - The name parameter contains the name of the element type, just - as with the startElementNS event.""" - - def characters(self, content): - """Receive notification of character data. - - The Parser will call this method to report each chunk of - character data. SAX parsers may return all contiguous - character data in a single chunk, or they may split it into - several chunks; however, all of the characters in any single - event must come from the same external entity so that the - Locator provides useful information.""" - - def ignorableWhitespace(self, whitespace): - """Receive notification of ignorable whitespace in element content. - - Validating Parsers must use this method to report each chunk - of ignorable whitespace (see the W3C XML 1.0 recommendation, - section 2.10): non-validating parsers may also use this method - if they are capable of parsing and using content models. - - SAX parsers may return all contiguous whitespace in a single - chunk, or they may split it into several chunks; however, all - of the characters in any single event must come from the same - external entity, so that the Locator provides useful - information.""" - - def processingInstruction(self, target, data): - """Receive notification of a processing instruction. - - The Parser will invoke this method once for each processing - instruction found: note that processing instructions may occur - before or after the main document element. - - A SAX parser should never report an XML declaration (XML 1.0, - section 2.8) or a text declaration (XML 1.0, section 4.3.1) - using this method.""" - - def skippedEntity(self, name): - """Receive notification of a skipped entity. - - The Parser will invoke this method once for each entity - skipped. Non-validating processors may skip entities if they - have not seen the declarations (because, for example, the - entity was declared in an external DTD subset). All processors - may skip external entities, depending on the values of the - http://xml.org/sax/features/external-general-entities and the - http://xml.org/sax/features/external-parameter-entities - properties.""" - - -# ===== DTDHandler ===== - -class DTDHandler: - """Handle DTD events. - - This interface specifies only those DTD events required for basic - parsing (unparsed entities and attributes).""" - - def notationDecl(self, name, publicId, systemId): - "Handle a notation declaration event." - - def unparsedEntityDecl(self, name, publicId, systemId, ndata): - "Handle an unparsed entity declaration event." - - -# ===== ENTITYRESOLVER ===== - -class EntityResolver: - """Basic interface for resolving entities. If you create an object - implementing this interface, then register the object with your - Parser, the parser will call the method in your object to - resolve all external entities. Note that DefaultHandler implements - this interface with the default behaviour.""" - - def resolveEntity(self, publicId, systemId): - """Resolve the system identifier of an entity and return either - the system identifier to read from as a string, or an InputSource - to read from.""" - return systemId - - -#============================================================================ -# -# CORE FEATURES -# -#============================================================================ - -feature_namespaces = "http://xml.org/sax/features/namespaces" -# true: Perform Namespace processing (default). -# false: Optionally do not perform Namespace processing -# (implies namespace-prefixes). -# access: (parsing) read-only; (not parsing) read/write - -feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" -# true: Report the original prefixed names and attributes used for Namespace -# declarations. -# false: Do not report attributes used for Namespace declarations, and -# optionally do not report original prefixed names (default). -# access: (parsing) read-only; (not parsing) read/write - -feature_string_interning = "http://xml.org/sax/features/string-interning" -# true: All element names, prefixes, attribute names, Namespace URIs, and -# local names are interned using the built-in intern function. -# false: Names are not necessarily interned, although they may be (default). -# access: (parsing) read-only; (not parsing) read/write - -feature_validation = "http://xml.org/sax/features/validation" -# true: Report all validation errors (implies external-general-entities and -# external-parameter-entities). -# false: Do not report validation errors. -# access: (parsing) read-only; (not parsing) read/write - -feature_external_ges = "http://xml.org/sax/features/external-general-entities" -# true: Include all external general (text) entities. -# false: Do not include external general entities. -# access: (parsing) read-only; (not parsing) read/write - -feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" -# true: Include all external parameter entities, including the external -# DTD subset. -# false: Do not include any external parameter entities, even the external -# DTD subset. -# access: (parsing) read-only; (not parsing) read/write - -all_features = [feature_namespaces, - feature_namespace_prefixes, - feature_string_interning, - feature_validation, - feature_external_ges, - feature_external_pes] - - -#============================================================================ -# -# CORE PROPERTIES -# -#============================================================================ - -property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" -# data type: xml.sax.sax2lib.LexicalHandler -# description: An optional extension handler for lexical events like comments. -# access: read/write - -property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" -# data type: xml.sax.sax2lib.DeclHandler -# description: An optional extension handler for DTD-related events other -# than notations and unparsed entities. -# access: read/write - -property_dom_node = "http://xml.org/sax/properties/dom-node" -# data type: org.w3c.dom.Node -# description: When parsing, the current DOM node being visited if this is -# a DOM iterator; when not parsing, the root DOM node for -# iteration. -# access: (parsing) read-only; (not parsing) read/write - -property_xml_string = "http://xml.org/sax/properties/xml-string" -# data type: String -# description: The literal string of characters that was the source for -# the current event. -# access: read-only - -property_encoding = "http://www.python.org/sax/properties/encoding" -# data type: String -# description: The name of the encoding to assume for input data. -# access: write: set the encoding, e.g. established by a higher-level -# protocol. May change during parsing (e.g. after -# processing a META tag) -# read: return the current encoding (possibly established through -# auto-detection. -# initial value: UTF-8 -# - -property_interning_dict = "http://www.python.org/sax/properties/interning-dict" -# data type: Dictionary -# description: The dictionary used to intern common strings in the document -# access: write: Request that the parser uses a specific dictionary, to -# allow interning across different documents -# read: return the current interning dictionary, or None -# - -all_properties = [property_lexical_handler, - property_dom_node, - property_declaration_handler, - property_xml_string, - property_encoding, - property_interning_dict] diff --git a/python/Lib/xml/sax/saxutils.py b/python/Lib/xml/sax/saxutils.py deleted file mode 100755 index 1b89e31aed..0000000000 --- a/python/Lib/xml/sax/saxutils.py +++ /dev/null @@ -1,353 +0,0 @@ -"""\ -A library of useful helper classes to the SAX classes, for the -convenience of application and driver writers. -""" - -import os, urlparse, urllib, types -import io -import sys -import handler -import xmlreader - -try: - _StringTypes = [types.StringType, types.UnicodeType] -except AttributeError: - _StringTypes = [types.StringType] - -def __dict_replace(s, d): - """Replace substrings of a string using a dictionary.""" - for key, value in d.items(): - s = s.replace(key, value) - return s - -def escape(data, entities={}): - """Escape &, <, and > in a string of data. - - You can escape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - - # must do ampersand first - data = data.replace("&", "&") - data = data.replace(">", ">") - data = data.replace("<", "<") - if entities: - data = __dict_replace(data, entities) - return data - -def unescape(data, entities={}): - """Unescape &, <, and > in a string of data. - - You can unescape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - data = data.replace("<", "<") - data = data.replace(">", ">") - if entities: - data = __dict_replace(data, entities) - # must do ampersand last - return data.replace("&", "&") - -def quoteattr(data, entities={}): - """Escape and quote an attribute value. - - Escape &, <, and > in a string of data, then quote it for use as - an attribute value. The \" character will be escaped as well, if - necessary. - - You can escape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - entities = entities.copy() - entities.update({'\n': ' ', '\r': ' ', '\t':' '}) - data = escape(data, entities) - if '"' in data: - if "'" in data: - data = '"%s"' % data.replace('"', """) - else: - data = "'%s'" % data - else: - data = '"%s"' % data - return data - - -def _gettextwriter(out, encoding): - if out is None: - import sys - out = sys.stdout - - if isinstance(out, io.RawIOBase): - buffer = io.BufferedIOBase(out) - # Keep the original file open when the TextIOWrapper is - # destroyed - buffer.close = lambda: None - else: - # This is to handle passed objects that aren't in the - # IOBase hierarchy, but just have a write method - buffer = io.BufferedIOBase() - buffer.writable = lambda: True - buffer.write = out.write - try: - # TextIOWrapper uses this methods to determine - # if BOM (for UTF-16, etc) should be added - buffer.seekable = out.seekable - buffer.tell = out.tell - except AttributeError: - pass - # wrap a binary writer with TextIOWrapper - return _UnbufferedTextIOWrapper(buffer, encoding=encoding, - errors='xmlcharrefreplace', - newline='\n') - - -class _UnbufferedTextIOWrapper(io.TextIOWrapper): - def write(self, s): - super(_UnbufferedTextIOWrapper, self).write(s) - self.flush() - - -class XMLGenerator(handler.ContentHandler): - - def __init__(self, out=None, encoding="iso-8859-1"): - handler.ContentHandler.__init__(self) - out = _gettextwriter(out, encoding) - self._write = out.write - self._flush = out.flush - self._ns_contexts = [{}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] - self._undeclared_ns_maps = [] - self._encoding = encoding - - def _qname(self, name): - """Builds a qualified name from a (ns_url, localname) pair""" - if name[0]: - # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is - # bound by definition to http://www.w3.org/XML/1998/namespace. It - # does not need to be declared and will not usually be found in - # self._current_context. - if 'http://www.w3.org/XML/1998/namespace' == name[0]: - return 'xml:' + name[1] - # The name is in a non-empty namespace - prefix = self._current_context[name[0]] - if prefix: - # If it is not the default namespace, prepend the prefix - return prefix + ":" + name[1] - # Return the unqualified name - return name[1] - - # ContentHandler methods - - def startDocument(self): - self._write(u'\n' % - self._encoding) - - def endDocument(self): - self._flush() - - def startPrefixMapping(self, prefix, uri): - self._ns_contexts.append(self._current_context.copy()) - self._current_context[uri] = prefix - self._undeclared_ns_maps.append((prefix, uri)) - - def endPrefixMapping(self, prefix): - self._current_context = self._ns_contexts[-1] - del self._ns_contexts[-1] - - def startElement(self, name, attrs): - self._write(u'<' + name) - for (name, value) in attrs.items(): - self._write(u' %s=%s' % (name, quoteattr(value))) - self._write(u'>') - - def endElement(self, name): - self._write(u'' % name) - - def startElementNS(self, name, qname, attrs): - self._write(u'<' + self._qname(name)) - - for prefix, uri in self._undeclared_ns_maps: - if prefix: - self._write(u' xmlns:%s="%s"' % (prefix, uri)) - else: - self._write(u' xmlns="%s"' % uri) - self._undeclared_ns_maps = [] - - for (name, value) in attrs.items(): - self._write(u' %s=%s' % (self._qname(name), quoteattr(value))) - self._write(u'>') - - def endElementNS(self, name, qname): - self._write(u'' % self._qname(name)) - - def characters(self, content): - if not isinstance(content, unicode): - content = unicode(content, self._encoding) - self._write(escape(content)) - - def ignorableWhitespace(self, content): - if not isinstance(content, unicode): - content = unicode(content, self._encoding) - self._write(content) - - def processingInstruction(self, target, data): - self._write(u'' % (target, data)) - - -class XMLFilterBase(xmlreader.XMLReader): - """This class is designed to sit between an XMLReader and the - client application's event handlers. By default, it does nothing - but pass requests up to the reader and events on to the handlers - unmodified, but subclasses can override specific methods to modify - the event stream or the configuration requests as they pass - through.""" - - def __init__(self, parent = None): - xmlreader.XMLReader.__init__(self) - self._parent = parent - - # ErrorHandler methods - - def error(self, exception): - self._err_handler.error(exception) - - def fatalError(self, exception): - self._err_handler.fatalError(exception) - - def warning(self, exception): - self._err_handler.warning(exception) - - # ContentHandler methods - - def setDocumentLocator(self, locator): - self._cont_handler.setDocumentLocator(locator) - - def startDocument(self): - self._cont_handler.startDocument() - - def endDocument(self): - self._cont_handler.endDocument() - - def startPrefixMapping(self, prefix, uri): - self._cont_handler.startPrefixMapping(prefix, uri) - - def endPrefixMapping(self, prefix): - self._cont_handler.endPrefixMapping(prefix) - - def startElement(self, name, attrs): - self._cont_handler.startElement(name, attrs) - - def endElement(self, name): - self._cont_handler.endElement(name) - - def startElementNS(self, name, qname, attrs): - self._cont_handler.startElementNS(name, qname, attrs) - - def endElementNS(self, name, qname): - self._cont_handler.endElementNS(name, qname) - - def characters(self, content): - self._cont_handler.characters(content) - - def ignorableWhitespace(self, chars): - self._cont_handler.ignorableWhitespace(chars) - - def processingInstruction(self, target, data): - self._cont_handler.processingInstruction(target, data) - - def skippedEntity(self, name): - self._cont_handler.skippedEntity(name) - - # DTDHandler methods - - def notationDecl(self, name, publicId, systemId): - self._dtd_handler.notationDecl(name, publicId, systemId) - - def unparsedEntityDecl(self, name, publicId, systemId, ndata): - self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) - - # EntityResolver methods - - def resolveEntity(self, publicId, systemId): - return self._ent_handler.resolveEntity(publicId, systemId) - - # XMLReader methods - - def parse(self, source): - self._parent.setContentHandler(self) - self._parent.setErrorHandler(self) - self._parent.setEntityResolver(self) - self._parent.setDTDHandler(self) - self._parent.parse(source) - - def setLocale(self, locale): - self._parent.setLocale(locale) - - def getFeature(self, name): - return self._parent.getFeature(name) - - def setFeature(self, name, state): - self._parent.setFeature(name, state) - - def getProperty(self, name): - return self._parent.getProperty(name) - - def setProperty(self, name, value): - self._parent.setProperty(name, value) - - # XMLFilter methods - - def getParent(self): - return self._parent - - def setParent(self, parent): - self._parent = parent - -# --- Utility functions - -def prepare_input_source(source, base = ""): - """This function takes an InputSource and an optional base URL and - returns a fully resolved InputSource object ready for reading.""" - - if type(source) in _StringTypes: - source = xmlreader.InputSource(source) - elif hasattr(source, "read"): - f = source - source = xmlreader.InputSource() - source.setByteStream(f) - if hasattr(f, "name"): - source.setSystemId(f.name) - - if source.getByteStream() is None: - try: - sysid = source.getSystemId() - basehead = os.path.dirname(os.path.normpath(base)) - encoding = sys.getfilesystemencoding() - if isinstance(sysid, unicode): - if not isinstance(basehead, unicode): - try: - basehead = basehead.decode(encoding) - except UnicodeDecodeError: - sysid = sysid.encode(encoding) - else: - if isinstance(basehead, unicode): - try: - sysid = sysid.decode(encoding) - except UnicodeDecodeError: - basehead = basehead.encode(encoding) - sysidfilename = os.path.join(basehead, sysid) - isfile = os.path.isfile(sysidfilename) - except UnicodeError: - isfile = False - if isfile: - source.setSystemId(sysidfilename) - f = open(sysidfilename, "rb") - else: - source.setSystemId(urlparse.urljoin(base, source.getSystemId())) - f = urllib.urlopen(source.getSystemId()) - - source.setByteStream(f) - - return source diff --git a/python/Lib/xml/sax/xmlreader.py b/python/Lib/xml/sax/xmlreader.py deleted file mode 100755 index 74aa39a7f1..0000000000 --- a/python/Lib/xml/sax/xmlreader.py +++ /dev/null @@ -1,381 +0,0 @@ -"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers -should be based on this code. """ - -import handler - -from _exceptions import SAXNotSupportedException, SAXNotRecognizedException - - -# ===== XMLREADER ===== - -class XMLReader: - """Interface for reading an XML document using callbacks. - - XMLReader is the interface that an XML parser's SAX2 driver must - implement. This interface allows an application to set and query - features and properties in the parser, to register event handlers - for document processing, and to initiate a document parse. - - All SAX interfaces are assumed to be synchronous: the parse - methods must not return until parsing is complete, and readers - must wait for an event-handler callback to return before reporting - the next event.""" - - def __init__(self): - self._cont_handler = handler.ContentHandler() - self._dtd_handler = handler.DTDHandler() - self._ent_handler = handler.EntityResolver() - self._err_handler = handler.ErrorHandler() - - def parse(self, source): - "Parse an XML document from a system identifier or an InputSource." - raise NotImplementedError("This method must be implemented!") - - def getContentHandler(self): - "Returns the current ContentHandler." - return self._cont_handler - - def setContentHandler(self, handler): - "Registers a new object to receive document content events." - self._cont_handler = handler - - def getDTDHandler(self): - "Returns the current DTD handler." - return self._dtd_handler - - def setDTDHandler(self, handler): - "Register an object to receive basic DTD-related events." - self._dtd_handler = handler - - def getEntityResolver(self): - "Returns the current EntityResolver." - return self._ent_handler - - def setEntityResolver(self, resolver): - "Register an object to resolve external entities." - self._ent_handler = resolver - - def getErrorHandler(self): - "Returns the current ErrorHandler." - return self._err_handler - - def setErrorHandler(self, handler): - "Register an object to receive error-message events." - self._err_handler = handler - - def setLocale(self, locale): - """Allow an application to set the locale for errors and warnings. - - SAX parsers are not required to provide localization for errors - and warnings; if they cannot support the requested locale, - however, they must raise a SAX exception. Applications may - request a locale change in the middle of a parse.""" - raise SAXNotSupportedException("Locale support not implemented") - - def getFeature(self, name): - "Looks up and returns the state of a SAX2 feature." - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def setFeature(self, name, state): - "Sets the state of a SAX2 feature." - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def getProperty(self, name): - "Looks up and returns the value of a SAX2 property." - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - - def setProperty(self, name, value): - "Sets the value of a SAX2 property." - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - -class IncrementalParser(XMLReader): - """This interface adds three extra methods to the XMLReader - interface that allow XML parsers to support incremental - parsing. Support for this interface is optional, since not all - underlying XML parsers support this functionality. - - When the parser is instantiated it is ready to begin accepting - data from the feed method immediately. After parsing has been - finished with a call to close the reset method must be called to - make the parser ready to accept new data, either from feed or - using the parse method. - - Note that these methods must _not_ be called during parsing, that - is, after parse has been called and before it returns. - - By default, the class also implements the parse method of the XMLReader - interface using the feed, close and reset methods of the - IncrementalParser interface as a convenience to SAX 2.0 driver - writers.""" - - def __init__(self, bufsize=2**16): - self._bufsize = bufsize - XMLReader.__init__(self) - - def parse(self, source): - import saxutils - source = saxutils.prepare_input_source(source) - - self.prepareParser(source) - file = source.getByteStream() - buffer = file.read(self._bufsize) - while buffer != "": - self.feed(buffer) - buffer = file.read(self._bufsize) - self.close() - - def feed(self, data): - """This method gives the raw XML data in the data parameter to - the parser and makes it parse the data, emitting the - corresponding events. It is allowed for XML constructs to be - split across several calls to feed. - - feed may raise SAXException.""" - raise NotImplementedError("This method must be implemented!") - - def prepareParser(self, source): - """This method is called by the parse implementation to allow - the SAX 2.0 driver to prepare itself for parsing.""" - raise NotImplementedError("prepareParser must be overridden!") - - def close(self): - """This method is called when the entire XML document has been - passed to the parser through the feed method, to notify the - parser that there are no more data. This allows the parser to - do the final checks on the document and empty the internal - data buffer. - - The parser will not be ready to parse another document until - the reset method has been called. - - close may raise SAXException.""" - raise NotImplementedError("This method must be implemented!") - - def reset(self): - """This method is called after close has been called to reset - the parser so that it is ready to parse new documents. The - results of calling parse or feed after close without calling - reset are undefined.""" - raise NotImplementedError("This method must be implemented!") - -# ===== LOCATOR ===== - -class Locator: - """Interface for associating a SAX event with a document - location. A locator object will return valid results only during - calls to DocumentHandler methods; at any other time, the - results are unpredictable.""" - - def getColumnNumber(self): - "Return the column number where the current event ends." - return -1 - - def getLineNumber(self): - "Return the line number where the current event ends." - return -1 - - def getPublicId(self): - "Return the public identifier for the current event." - return None - - def getSystemId(self): - "Return the system identifier for the current event." - return None - -# ===== INPUTSOURCE ===== - -class InputSource: - """Encapsulation of the information needed by the XMLReader to - read entities. - - This class may include information about the public identifier, - system identifier, byte stream (possibly with character encoding - information) and/or the character stream of an entity. - - Applications will create objects of this class for use in the - XMLReader.parse method and for returning from - EntityResolver.resolveEntity. - - An InputSource belongs to the application, the XMLReader is not - allowed to modify InputSource objects passed to it from the - application, although it may make copies and modify those.""" - - def __init__(self, system_id = None): - self.__system_id = system_id - self.__public_id = None - self.__encoding = None - self.__bytefile = None - self.__charfile = None - - def setPublicId(self, public_id): - "Sets the public identifier of this InputSource." - self.__public_id = public_id - - def getPublicId(self): - "Returns the public identifier of this InputSource." - return self.__public_id - - def setSystemId(self, system_id): - "Sets the system identifier of this InputSource." - self.__system_id = system_id - - def getSystemId(self): - "Returns the system identifier of this InputSource." - return self.__system_id - - def setEncoding(self, encoding): - """Sets the character encoding of this InputSource. - - The encoding must be a string acceptable for an XML encoding - declaration (see section 4.3.3 of the XML recommendation). - - The encoding attribute of the InputSource is ignored if the - InputSource also contains a character stream.""" - self.__encoding = encoding - - def getEncoding(self): - "Get the character encoding of this InputSource." - return self.__encoding - - def setByteStream(self, bytefile): - """Set the byte stream (a Python file-like object which does - not perform byte-to-character conversion) for this input - source. - - The SAX parser will ignore this if there is also a character - stream specified, but it will use a byte stream in preference - to opening a URI connection itself. - - If the application knows the character encoding of the byte - stream, it should set it with the setEncoding method.""" - self.__bytefile = bytefile - - def getByteStream(self): - """Get the byte stream for this input source. - - The getEncoding method will return the character encoding for - this byte stream, or None if unknown.""" - return self.__bytefile - - def setCharacterStream(self, charfile): - """Set the character stream for this input source. (The stream - must be a Python 2.0 Unicode-wrapped file-like that performs - conversion to Unicode strings.) - - If there is a character stream specified, the SAX parser will - ignore any byte stream and will not attempt to open a URI - connection to the system identifier.""" - self.__charfile = charfile - - def getCharacterStream(self): - "Get the character stream for this input source." - return self.__charfile - -# ===== ATTRIBUTESIMPL ===== - -class AttributesImpl: - - def __init__(self, attrs): - """Non-NS-aware implementation. - - attrs should be of the form {name : value}.""" - self._attrs = attrs - - def getLength(self): - return len(self._attrs) - - def getType(self, name): - return "CDATA" - - def getValue(self, name): - return self._attrs[name] - - def getValueByQName(self, name): - return self._attrs[name] - - def getNameByQName(self, name): - if not name in self._attrs: - raise KeyError, name - return name - - def getQNameByName(self, name): - if not name in self._attrs: - raise KeyError, name - return name - - def getNames(self): - return self._attrs.keys() - - def getQNames(self): - return self._attrs.keys() - - def __len__(self): - return len(self._attrs) - - def __getitem__(self, name): - return self._attrs[name] - - def keys(self): - return self._attrs.keys() - - def has_key(self, name): - return name in self._attrs - - def __contains__(self, name): - return name in self._attrs - - def get(self, name, alternative=None): - return self._attrs.get(name, alternative) - - def copy(self): - return self.__class__(self._attrs) - - def items(self): - return self._attrs.items() - - def values(self): - return self._attrs.values() - -# ===== ATTRIBUTESNSIMPL ===== - -class AttributesNSImpl(AttributesImpl): - - def __init__(self, attrs, qnames): - """NS-aware implementation. - - attrs should be of the form {(ns_uri, lname): value, ...}. - qnames of the form {(ns_uri, lname): qname, ...}.""" - self._attrs = attrs - self._qnames = qnames - - def getValueByQName(self, name): - for (nsname, qname) in self._qnames.items(): - if qname == name: - return self._attrs[nsname] - - raise KeyError, name - - def getNameByQName(self, name): - for (nsname, qname) in self._qnames.items(): - if qname == name: - return nsname - - raise KeyError, name - - def getQNameByName(self, name): - return self._qnames[name] - - def getQNames(self): - return self._qnames.values() - - def copy(self): - return self.__class__(self._attrs, self._qnames) - - -def _test(): - XMLReader() - IncrementalParser() - Locator() - -if __name__ == "__main__": - _test() diff --git a/python/Lib/xmllib.py b/python/Lib/xmllib.py deleted file mode 100755 index 96ee8411e7..0000000000 --- a/python/Lib/xmllib.py +++ /dev/null @@ -1,930 +0,0 @@ -"""A parser for XML, using the derived class as static DTD.""" - -# Author: Sjoerd Mullender. - -import re -import string - -import warnings -warnings.warn("The xmllib module is obsolete. Use xml.sax instead.", - DeprecationWarning, 2) -del warnings - -version = '0.3' - -class Error(RuntimeError): - pass - -# Regular expressions used for parsing - -_S = '[ \t\r\n]+' # white space -_opS = '[ \t\r\n]*' # optional white space -_Name = '[a-zA-Z_:][-a-zA-Z0-9._:]*' # valid XML name -_QStr = "(?:'[^']*'|\"[^\"]*\")" # quoted XML string -illegal = re.compile('[^\t\r\n -\176\240-\377]') # illegal chars in content -interesting = re.compile('[]&<]') - -amp = re.compile('&') -ref = re.compile('&(' + _Name + '|#[0-9]+|#x[0-9a-fA-F]+)[^-a-zA-Z0-9._:]') -entityref = re.compile('&(?P' + _Name + ')[^-a-zA-Z0-9._:]') -charref = re.compile('&#(?P[0-9]+[^0-9]|x[0-9a-fA-F]+[^0-9a-fA-F])') -space = re.compile(_S + '$') -newline = re.compile('\n') - -attrfind = re.compile( - _S + '(?P' + _Name + ')' - '(' + _opS + '=' + _opS + - '(?P'+_QStr+'|[-a-zA-Z0-9.:+*%?!\(\)_#=~]+))?') -starttagopen = re.compile('<' + _Name) -starttagend = re.compile(_opS + '(?P/?)>') -starttagmatch = re.compile('<(?P'+_Name+')' - '(?P(?:'+attrfind.pattern+')*)'+ - starttagend.pattern) -endtagopen = re.compile('') -endbracketfind = re.compile('(?:[^>\'"]|'+_QStr+')*>') -tagfind = re.compile(_Name) -cdataopen = re.compile(r'') -# this matches one of the following: -# SYSTEM SystemLiteral -# PUBLIC PubidLiteral SystemLiteral -_SystemLiteral = '(?P<%s>'+_QStr+')' -_PublicLiteral = '(?P<%s>"[-\'\(\)+,./:=?;!*#@$_%% \n\ra-zA-Z0-9]*"|' \ - "'[-\(\)+,./:=?;!*#@$_%% \n\ra-zA-Z0-9]*')" -_ExternalId = '(?:SYSTEM|' \ - 'PUBLIC'+_S+_PublicLiteral%'pubid'+ \ - ')'+_S+_SystemLiteral%'syslit' -doctype = re.compile(''+_Name+')' - '(?:'+_S+_ExternalId+')?'+_opS) -xmldecl = re.compile('<\?xml'+_S+ - 'version'+_opS+'='+_opS+'(?P'+_QStr+')'+ - '(?:'+_S+'encoding'+_opS+'='+_opS+ - "(?P'[A-Za-z][-A-Za-z0-9._]*'|" - '"[A-Za-z][-A-Za-z0-9._]*"))?' - '(?:'+_S+'standalone'+_opS+'='+_opS+ - '(?P\'(?:yes|no)\'|"(?:yes|no)"))?'+ - _opS+'\?>') -procopen = re.compile(r'<\?(?P' + _Name + ')' + _opS) -procclose = re.compile(_opS + r'\?>') -commentopen = re.compile('') -doubledash = re.compile('--') -attrtrans = string.maketrans(' \r\n\t', ' ') - -# definitions for XML namespaces -_NCName = '[a-zA-Z_][-a-zA-Z0-9._]*' # XML Name, minus the ":" -ncname = re.compile(_NCName + '$') -qname = re.compile('(?:(?P' + _NCName + '):)?' # optional prefix - '(?P' + _NCName + ')$') - -xmlns = re.compile('xmlns(?::(?P'+_NCName+'))?$') - -# XML parser base class -- find tags and call handler functions. -# Usage: p = XMLParser(); p.feed(data); ...; p.close(). -# The dtd is defined by deriving a class which defines methods with -# special names to handle tags: start_foo and end_foo to handle -# and , respectively. The data between tags is passed to the -# parser by calling self.handle_data() with some data as argument (the -# data may be split up in arbitrary chunks). - -class XMLParser: - attributes = {} # default, to be overridden - elements = {} # default, to be overridden - - # parsing options, settable using keyword args in __init__ - __accept_unquoted_attributes = 0 - __accept_missing_endtag_name = 0 - __map_case = 0 - __accept_utf8 = 0 - __translate_attribute_references = 1 - - # Interface -- initialize and reset this instance - def __init__(self, **kw): - self.__fixed = 0 - if 'accept_unquoted_attributes' in kw: - self.__accept_unquoted_attributes = kw['accept_unquoted_attributes'] - if 'accept_missing_endtag_name' in kw: - self.__accept_missing_endtag_name = kw['accept_missing_endtag_name'] - if 'map_case' in kw: - self.__map_case = kw['map_case'] - if 'accept_utf8' in kw: - self.__accept_utf8 = kw['accept_utf8'] - if 'translate_attribute_references' in kw: - self.__translate_attribute_references = kw['translate_attribute_references'] - self.reset() - - def __fixelements(self): - self.__fixed = 1 - self.elements = {} - self.__fixdict(self.__dict__) - self.__fixclass(self.__class__) - - def __fixclass(self, kl): - self.__fixdict(kl.__dict__) - for k in kl.__bases__: - self.__fixclass(k) - - def __fixdict(self, dict): - for key in dict.keys(): - if key[:6] == 'start_': - tag = key[6:] - start, end = self.elements.get(tag, (None, None)) - if start is None: - self.elements[tag] = getattr(self, key), end - elif key[:4] == 'end_': - tag = key[4:] - start, end = self.elements.get(tag, (None, None)) - if end is None: - self.elements[tag] = start, getattr(self, key) - - # Interface -- reset this instance. Loses all unprocessed data - def reset(self): - self.rawdata = '' - self.stack = [] - self.nomoretags = 0 - self.literal = 0 - self.lineno = 1 - self.__at_start = 1 - self.__seen_doctype = None - self.__seen_starttag = 0 - self.__use_namespaces = 0 - self.__namespaces = {'xml':None} # xml is implicitly declared - # backward compatibility hack: if elements not overridden, - # fill it in ourselves - if self.elements is XMLParser.elements: - self.__fixelements() - - # For derived classes only -- enter literal mode (CDATA) till EOF - def setnomoretags(self): - self.nomoretags = self.literal = 1 - - # For derived classes only -- enter literal mode (CDATA) - def setliteral(self, *args): - self.literal = 1 - - # Interface -- feed some data to the parser. Call this as - # often as you want, with as little or as much text as you - # want (may include '\n'). (This just saves the text, all the - # processing is done by goahead().) - def feed(self, data): - self.rawdata = self.rawdata + data - self.goahead(0) - - # Interface -- handle the remaining data - def close(self): - self.goahead(1) - if self.__fixed: - self.__fixed = 0 - # remove self.elements so that we don't leak - del self.elements - - # Interface -- translate references - def translate_references(self, data, all = 1): - if not self.__translate_attribute_references: - return data - i = 0 - while 1: - res = amp.search(data, i) - if res is None: - return data - s = res.start(0) - res = ref.match(data, s) - if res is None: - self.syntax_error("bogus `&'") - i = s+1 - continue - i = res.end(0) - str = res.group(1) - rescan = 0 - if str[0] == '#': - if str[1] == 'x': - str = chr(int(str[2:], 16)) - else: - str = chr(int(str[1:])) - if data[i - 1] != ';': - self.syntax_error("`;' missing after char reference") - i = i-1 - elif all: - if str in self.entitydefs: - str = self.entitydefs[str] - rescan = 1 - elif data[i - 1] != ';': - self.syntax_error("bogus `&'") - i = s + 1 # just past the & - continue - else: - self.syntax_error("reference to unknown entity `&%s;'" % str) - str = '&' + str + ';' - elif data[i - 1] != ';': - self.syntax_error("bogus `&'") - i = s + 1 # just past the & - continue - - # when we get here, str contains the translated text and i points - # to the end of the string that is to be replaced - data = data[:s] + str + data[i:] - if rescan: - i = s - else: - i = s + len(str) - - # Interface - return a dictionary of all namespaces currently valid - def getnamespace(self): - nsdict = {} - for t, d, nst in self.stack: - nsdict.update(d) - return nsdict - - # Internal -- handle data as far as reasonable. May leave state - # and data to be processed by a subsequent call. If 'end' is - # true, force handling all data as if followed by EOF marker. - def goahead(self, end): - rawdata = self.rawdata - i = 0 - n = len(rawdata) - while i < n: - if i > 0: - self.__at_start = 0 - if self.nomoretags: - data = rawdata[i:n] - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - i = n - break - res = interesting.search(rawdata, i) - if res: - j = res.start(0) - else: - j = n - if i < j: - data = rawdata[i:j] - if self.__at_start and space.match(data) is None: - self.syntax_error('illegal data at start of file') - self.__at_start = 0 - if not self.stack and space.match(data) is None: - self.syntax_error('data not in content') - if not self.__accept_utf8 and illegal.search(data): - self.syntax_error('illegal character in content') - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - i = j - if i == n: break - if rawdata[i] == '<': - if starttagopen.match(rawdata, i): - if self.literal: - data = rawdata[i] - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - i = i+1 - continue - k = self.parse_starttag(i) - if k < 0: break - self.__seen_starttag = 1 - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - if endtagopen.match(rawdata, i): - k = self.parse_endtag(i) - if k < 0: break - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - if commentopen.match(rawdata, i): - if self.literal: - data = rawdata[i] - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - i = i+1 - continue - k = self.parse_comment(i) - if k < 0: break - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - if cdataopen.match(rawdata, i): - k = self.parse_cdata(i) - if k < 0: break - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - res = xmldecl.match(rawdata, i) - if res: - if not self.__at_start: - self.syntax_error(" declaration not at start of document") - version, encoding, standalone = res.group('version', - 'encoding', - 'standalone') - if version[1:-1] != '1.0': - raise Error('only XML version 1.0 supported') - if encoding: encoding = encoding[1:-1] - if standalone: standalone = standalone[1:-1] - self.handle_xml(encoding, standalone) - i = res.end(0) - continue - res = procopen.match(rawdata, i) - if res: - k = self.parse_proc(i) - if k < 0: break - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - res = doctype.match(rawdata, i) - if res: - if self.literal: - data = rawdata[i] - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - i = i+1 - continue - if self.__seen_doctype: - self.syntax_error('multiple DOCTYPE elements') - if self.__seen_starttag: - self.syntax_error('DOCTYPE not at beginning of document') - k = self.parse_doctype(res) - if k < 0: break - self.__seen_doctype = res.group('name') - if self.__map_case: - self.__seen_doctype = self.__seen_doctype.lower() - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - elif rawdata[i] == '&': - if self.literal: - data = rawdata[i] - self.handle_data(data) - i = i+1 - continue - res = charref.match(rawdata, i) - if res is not None: - i = res.end(0) - if rawdata[i-1] != ';': - self.syntax_error("`;' missing in charref") - i = i-1 - if not self.stack: - self.syntax_error('data not in content') - self.handle_charref(res.group('char')[:-1]) - self.lineno = self.lineno + res.group(0).count('\n') - continue - res = entityref.match(rawdata, i) - if res is not None: - i = res.end(0) - if rawdata[i-1] != ';': - self.syntax_error("`;' missing in entityref") - i = i-1 - name = res.group('name') - if self.__map_case: - name = name.lower() - if name in self.entitydefs: - self.rawdata = rawdata = rawdata[:res.start(0)] + self.entitydefs[name] + rawdata[i:] - n = len(rawdata) - i = res.start(0) - else: - self.unknown_entityref(name) - self.lineno = self.lineno + res.group(0).count('\n') - continue - elif rawdata[i] == ']': - if self.literal: - data = rawdata[i] - self.handle_data(data) - i = i+1 - continue - if n-i < 3: - break - if cdataclose.match(rawdata, i): - self.syntax_error("bogus `]]>'") - self.handle_data(rawdata[i]) - i = i+1 - continue - else: - raise Error('neither < nor & ??') - # We get here only if incomplete matches but - # nothing else - break - # end while - if i > 0: - self.__at_start = 0 - if end and i < n: - data = rawdata[i] - self.syntax_error("bogus `%s'" % data) - if not self.__accept_utf8 and illegal.search(data): - self.syntax_error('illegal character in content') - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - self.rawdata = rawdata[i+1:] - return self.goahead(end) - self.rawdata = rawdata[i:] - if end: - if not self.__seen_starttag: - self.syntax_error('no elements in file') - if self.stack: - self.syntax_error('missing end tags') - while self.stack: - self.finish_endtag(self.stack[-1][0]) - - # Internal -- parse comment, return length or -1 if not terminated - def parse_comment(self, i): - rawdata = self.rawdata - if rawdata[i:i+4] != ' (Extraneous whitespace in declaration) - - You can pass in a custom list of (RE object, replace method) - tuples to get Beautiful Soup to scrub your input the way you - want.""" - Tag.__init__(self, self.ROOT_TAG_NAME) - if avoidParserProblems \ - and not isList(avoidParserProblems): - avoidParserProblems = self.PARSER_MASSAGE - self.avoidParserProblems = avoidParserProblems - SGMLParser.__init__(self) - self.quoteStack = [] - self.hidden = 1 - self.reset() - if hasattr(text, 'read'): - #It's a file-type object. - text = text.read() - if text: - self.feed(text) - if initialTextIsEverything: - self.done() - - def __getattr__(self, methodName): - """This method routes method call requests to either the SGMLParser - superclass or the Tag superclass, depending on the method name.""" - if methodName.find('start_') == 0 or methodName.find('end_') == 0 \ - or methodName.find('do_') == 0: - return SGMLParser.__getattr__(self, methodName) - elif methodName.find('__') != 0: - return Tag.__getattr__(self, methodName) - else: - raise AttributeError - - def feed(self, text): - if self.avoidParserProblems: - for fix, m in self.avoidParserProblems: - text = fix.sub(m, text) - SGMLParser.feed(self, text) - - def done(self): - """Called when you're done parsing, so that the unclosed tags can be - correctly processed.""" - self.endData() #NEW - while self.currentTag.name != self.ROOT_TAG_NAME: - self.popTag() - - def reset(self): - SGMLParser.reset(self) - self.currentData = [] - self.currentTag = None - self.tagStack = [] - self.pushTag(self) - - def popTag(self): - tag = self.tagStack.pop() - # Tags with just one string-owning child get the child as a - # 'string' property, so that soup.tag.string is shorthand for - # soup.tag.contents[0] - if len(self.currentTag.contents) == 1 and \ - isinstance(self.currentTag.contents[0], NavigableText): - self.currentTag.string = self.currentTag.contents[0] - - #print "Pop", tag.name - if self.tagStack: - self.currentTag = self.tagStack[-1] - return self.currentTag - - def pushTag(self, tag): - #print "Push", tag.name - if self.currentTag: - self.currentTag.append(tag) - self.tagStack.append(tag) - self.currentTag = self.tagStack[-1] - - def endData(self): - currentData = ''.join(self.currentData) - if currentData: - if not currentData.strip(): - if '\n' in currentData: - currentData = '\n' - else: - currentData = ' ' - c = NavigableString - if type(currentData) == types.UnicodeType: - c = NavigableUnicodeString - o = c(currentData) - o.setup(self.currentTag, self.previous) - if self.previous: - self.previous.next = o - self.previous = o - self.currentTag.contents.append(o) - self.currentData = [] - - def _popToTag(self, name, inclusivePop=True): - """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - if name == self.ROOT_TAG_NAME: - return - - numPops = 0 - mostRecentTag = None - for i in range(len(self.tagStack)-1, 0, -1): - if name == self.tagStack[i].name: - numPops = len(self.tagStack)-i - break - if not inclusivePop: - numPops = numPops - 1 - - for i in range(0, numPops): - mostRecentTag = self.popTag() - return mostRecentTag - - def _smartPop(self, name): - - """We need to pop up to the previous tag of this type, unless - one of this tag's nesting reset triggers comes between this - tag and the previous tag of this type, OR unless this tag is a - generic nesting trigger and another generic nesting trigger - comes between this tag and the previous tag of this type. - - Examples: -

FooBar

should pop to 'p', not 'b'. -

FooBar

should pop to 'table', not 'p'. -

Foo

Bar

should pop to 'tr', not 'p'. -

FooBar

should pop to 'p', not 'b'. - -

    • *
    • * should pop to 'ul', not the first 'li'. -
  • ** should pop to 'table', not the first 'tr' - tag should - implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' - """ - - nestingResetTriggers = self.NESTABLE_TAGS.get(name) - isNestable = nestingResetTriggers != None - isResetNesting = self.RESET_NESTING_TAGS.has_key(name) - popTo = None - inclusive = True - for i in range(len(self.tagStack)-1, 0, -1): - p = self.tagStack[i] - if (not p or p.name == name) and not isNestable: - #Non-nestable tags get popped to the top or to their - #last occurance. - popTo = name - break - if (nestingResetTriggers != None - and p.name in nestingResetTriggers) \ - or (nestingResetTriggers == None and isResetNesting - and self.RESET_NESTING_TAGS.has_key(p.name)): - - #If we encounter one of the nesting reset triggers - #peculiar to this tag, or we encounter another tag - #that causes nesting to reset, pop up to but not - #including that tag. - - popTo = p.name - inclusive = False - break - p = p.parent - if popTo: - self._popToTag(popTo, inclusive) - - def unknown_starttag(self, name, attrs, selfClosing=0): - #print "Start tag %s" % name - if self.quoteStack: - #This is not a real tag. - #print "<%s> is not real!" % name - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) - self.handle_data('<%s%s>' % (name, attrs)) - return - self.endData() - if not name in self.SELF_CLOSING_TAGS and not selfClosing: - self._smartPop(name) - tag = Tag(name, attrs, self.currentTag, self.previous) - if self.previous: - self.previous.next = tag - self.previous = tag - self.pushTag(tag) - if selfClosing or name in self.SELF_CLOSING_TAGS: - self.popTag() - if name in self.QUOTE_TAGS: - #print "Beginning quote (%s)" % name - self.quoteStack.append(name) - self.literal = 1 - - def unknown_endtag(self, name): - if self.quoteStack and self.quoteStack[-1] != name: - #This is not a real end tag. - #print " is not real!" % name - self.handle_data('' % name) - return - self.endData() - self._popToTag(name) - if self.quoteStack and self.quoteStack[-1] == name: - self.quoteStack.pop() - self.literal = (len(self.quoteStack) > 0) - - def handle_data(self, data): - self.currentData.append(data) - - def handle_pi(self, text): - "Propagate processing instructions right through." - self.handle_data("" % text) - - def handle_comment(self, text): - "Propagate comments right through." - self.handle_data("" % text) - - def handle_charref(self, ref): - "Propagate char refs right through." - self.handle_data('&#%s;' % ref) - - def handle_entityref(self, ref): - "Propagate entity refs right through." - self.handle_data('&%s;' % ref) - - def handle_decl(self, data): - "Propagate DOCTYPEs and the like right through." - self.handle_data('' % data) - - def parse_declaration(self, i): - """Treat a bogus SGML declaration as raw data. Treat a CDATA - declaration as regular data.""" - j = None - if self.rawdata[i:i+9] == '', i) - if k == -1: - k = len(self.rawdata) - self.handle_data(self.rawdata[i+9:k]) - j = k+3 - else: - try: - j = SGMLParser.parse_declaration(self, i) - except SGMLParseError: - toHandle = self.rawdata[i:] - self.handle_data(toHandle) - j = i + len(toHandle) - return j - -class BeautifulSoup(BeautifulStoneSoup): - - """This parser knows the following facts about HTML: - - * Some tags have no closing tag and should be interpreted as being - closed as soon as they are encountered. - - * The text inside some tags (ie. 'script') may contain tags which - are not really part of the document and which should be parsed - as text, not tags. If you want to parse the text as tags, you can - always fetch it and parse it explicitly. - - * Tag nesting rules: - - Most tags can't be nested at all. For instance, the occurance of - a

    tag should implicitly close the previous

    tag. - -

    Para1

    Para2 - should be transformed into: -

    Para1

    Para2 - - Some tags can be nested arbitrarily. For instance, the occurance - of a

    tag should _not_ implicitly close the previous -
    tag. - - Alice said:
    Bob said:
    Blah - should NOT be transformed into: - Alice said:
    Bob said:
    Blah - - Some tags can be nested, but the nesting is reset by the - interposition of other tags. For instance, a
    , - but not close a tag in another table. - -
    BlahBlah - should be transformed into: -
    BlahBlah - but, - Blah
    Blah - should NOT be transformed into - Blah
    Blah - - Differing assumptions about tag nesting rules are a major source - of problems with the BeautifulSoup class. If BeautifulSoup is not - treating as nestable a tag your page author treats as nestable, - try ICantBelieveItsBeautifulSoup before writing your own - subclass.""" - - SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base']) - - QUOTE_TAGS = {'script': None} - - #According to the HTML standard, each of these inline tags can - #contain another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', - 'center'] - - #According to the HTML standard, these block tags can contain - #another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] - - #Lists can contain other lists, but there are restrictions. - NESTABLE_LIST_TAGS = { 'ol' : [], - 'ul' : [], - 'li' : ['ul', 'ol'], - 'dl' : [], - 'dd' : ['dl'], - 'dt' : ['dl'] } - - #Tables can contain other tables, but there are restrictions. - NESTABLE_TABLE_TAGS = {'table' : [], - 'tr' : ['table', 'tbody', 'tfoot', 'thead'], - 'td' : ['tr'], - 'th' : ['tr'], - } - - NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] - - #If one of these tags is encountered, all tags up to the next tag of - #this type are popped. - RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', - NON_NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, - NESTABLE_TABLE_TAGS) - - NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) - -class ICantBelieveItsBeautifulSoup(BeautifulSoup): - - """The BeautifulSoup class is oriented towards skipping over - common HTML errors like unclosed tags. However, sometimes it makes - errors of its own. For instance, consider this fragment: - - FooBar - - This is perfectly valid (if bizarre) HTML. However, the - BeautifulSoup class will implicitly close the first b tag when it - encounters the second 'b'. It will think the author wrote - "FooBar", and didn't close the first 'b' tag, because - there's no real-world reason to bold something that's already - bold. When it encounters '' it will close two more 'b' - tags, for a grand total of three tags closed instead of two. This - can throw off the rest of your document structure. The same is - true of a number of other tags, listed below. - - It's much more common for someone to forget to close (eg.) a 'b' - tag than to actually use nested 'b' tags, and the BeautifulSoup - class handles the common case. This class handles the - not-co-common case: where you can't believe someone wrote what - they did, but it's valid HTML and BeautifulSoup screwed up by - assuming it wouldn't be. - - If this doesn't do what you need, try subclassing this class or - BeautifulSoup, and providing your own list of NESTABLE_TAGS.""" - - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ - ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', - 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', - 'big'] - - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] - - NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) - -class BeautifulSOAP(BeautifulStoneSoup): - """This class will push a tag with only a single string child into - the tag's parent as an attribute. The attribute's name is the tag - name, and the value is the string child. An example should give - the flavor of the change: - - baz - => - baz - - You can then access fooTag['bar'] instead of fooTag.barTag.string. - - This is, of course, useful for scraping structures that tend to - use subelements instead of attributes, such as SOAP messages. Note - that it modifies its input, so don't print the modified version - out. - - I'm not sure how many people really want to use this class; let me - know if you do. Mainly I like the name.""" - - def popTag(self): - if len(self.tagStack) > 1: - tag = self.tagStack[-1] - parent = self.tagStack[-2] - parent._getAttrMap() - if (isinstance(tag, Tag) and len(tag.contents) == 1 and - isinstance(tag.contents[0], NavigableText) and - not parent.attrMap.has_key(tag.name)): - parent[tag.name] = tag.contents[0] - BeautifulStoneSoup.popTag(self) - -#Enterprise class names! It has come to our attention that some people -#think the names of the Beautiful Soup parser classes are too silly -#and "unprofessional" for use in enterprise screen-scraping. We feel -#your pain! For such-minded folk, the Beautiful Soup Consortium And -#All-Night Kosher Bakery recommends renaming this file to -#"RobustParser.py" (or, in cases of extreme enterprisitude, -#"RobustParserBeanInterface.class") and using the following -#enterprise-friendly class aliases: -class RobustXMLParser(BeautifulStoneSoup): - pass -class RobustHTMLParser(BeautifulSoup): - pass -class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): - pass -class SimplifyingSOAPParser(BeautifulSOAP): - pass - -### - - -#By default, act as an HTML pretty-printer. -if __name__ == '__main__': - import sys - soup = BeautifulStoneSoup(sys.stdin.read()) - print soup.prettify() diff --git a/samples-and-tests/i-am-a-developer/mechanize/_clientcookie.py b/samples-and-tests/i-am-a-developer/mechanize/_clientcookie.py deleted file mode 100644 index caeb82bfc0..0000000000 --- a/samples-and-tests/i-am-a-developer/mechanize/_clientcookie.py +++ /dev/null @@ -1,1707 +0,0 @@ -"""HTTP cookie handling for web clients. - -This module originally developed from my port of Gisle Aas' Perl module -HTTP::Cookies, from the libwww-perl library. - -Docstrings, comments and debug strings in this code refer to the -attributes of the HTTP cookie system as cookie-attributes, to distinguish -them clearly from Python attributes. - - CookieJar____ - / \ \ - FileCookieJar \ \ - / | \ \ \ - MozillaCookieJar | LWPCookieJar \ \ - | | \ - | ---MSIEBase | \ - | / | | \ - | / MSIEDBCookieJar BSDDBCookieJar - |/ - MSIECookieJar - -Comments to John J Lee . - - -Copyright 2002-2006 John J Lee -Copyright 1997-1999 Gisle Aas (original libwww-perl code) -Copyright 2002-2003 Johnny Lee (original MSIE Perl code) - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import sys, re, copy, time, urllib, types, logging -try: - import threading - _threading = threading; del threading -except ImportError: - import dummy_threading - _threading = dummy_threading; del dummy_threading - -MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " - "instance initialised with one)") -DEFAULT_HTTP_PORT = "80" - -from _headersutil import split_header_words, parse_ns_headers -from _util import isstringlike -import _rfc3986 - -debug = logging.getLogger("mechanize.cookies").debug - - -def reraise_unmasked_exceptions(unmasked=()): - # There are a few catch-all except: statements in this module, for - # catching input that's bad in unexpected ways. - # This function re-raises some exceptions we don't want to trap. - import mechanize, warnings - if not mechanize.USE_BARE_EXCEPT: - raise - unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError) - etype = sys.exc_info()[0] - if issubclass(etype, unmasked): - raise - # swallowed an exception - import traceback, StringIO - f = StringIO.StringIO() - traceback.print_exc(None, f) - msg = f.getvalue() - warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2) - - -IPV4_RE = re.compile(r"\.\d+$") -def is_HDN(text): - """Return True if text is a host domain name.""" - # XXX - # This may well be wrong. Which RFC is HDN defined in, if any (for - # the purposes of RFC 2965)? - # For the current implementation, what about IPv6? Remember to look - # at other uses of IPV4_RE also, if change this. - return not (IPV4_RE.search(text) or - text == "" or - text[0] == "." or text[-1] == ".") - -def domain_match(A, B): - """Return True if domain A domain-matches domain B, according to RFC 2965. - - A and B may be host domain names or IP addresses. - - RFC 2965, section 1: - - Host names can be specified either as an IP address or a HDN string. - Sometimes we compare one host name with another. (Such comparisons SHALL - be case-insensitive.) Host A's name domain-matches host B's if - - * their host name strings string-compare equal; or - - * A is a HDN string and has the form NB, where N is a non-empty - name string, B has the form .B', and B' is a HDN string. (So, - x.y.com domain-matches .Y.com but not Y.com.) - - Note that domain-match is not a commutative operation: a.b.c.com - domain-matches .c.com, but not the reverse. - - """ - # Note that, if A or B are IP addresses, the only relevant part of the - # definition of the domain-match algorithm is the direct string-compare. - A = A.lower() - B = B.lower() - if A == B: - return True - if not is_HDN(A): - return False - i = A.rfind(B) - has_form_nb = not (i == -1 or i == 0) - return ( - has_form_nb and - B.startswith(".") and - is_HDN(B[1:]) - ) - -def liberal_is_HDN(text): - """Return True if text is a sort-of-like a host domain name. - - For accepting/blocking domains. - - """ - return not IPV4_RE.search(text) - -def user_domain_match(A, B): - """For blocking/accepting domains. - - A and B may be host domain names or IP addresses. - - """ - A = A.lower() - B = B.lower() - if not (liberal_is_HDN(A) and liberal_is_HDN(B)): - if A == B: - # equal IP addresses - return True - return False - initial_dot = B.startswith(".") - if initial_dot and A.endswith(B): - return True - if not initial_dot and A == B: - return True - return False - -cut_port_re = re.compile(r":\d+$") -def request_host(request): - """Return request-host, as defined by RFC 2965. - - Variation from RFC: returned value is lowercased, for convenient - comparison. - - """ - url = request.get_full_url() - host = _rfc3986.urlsplit(url)[1] - if host is None: - host = request.get_header("Host", "") - # remove port, if present - return cut_port_re.sub("", host, 1) - -def request_host_lc(request): - return request_host(request).lower() - -def eff_request_host(request): - """Return a tuple (request-host, effective request-host name).""" - erhn = req_host = request_host(request) - if req_host.find(".") == -1 and not IPV4_RE.search(req_host): - erhn = req_host + ".local" - return req_host, erhn - -def eff_request_host_lc(request): - req_host, erhn = eff_request_host(request) - return req_host.lower(), erhn.lower() - -def effective_request_host(request): - """Return the effective request-host, as defined by RFC 2965.""" - return eff_request_host(request)[1] - -def request_path(request): - """request-URI, as defined by RFC 2965.""" - url = request.get_full_url() - path, query, frag = _rfc3986.urlsplit(url)[2:] - path = escape_path(path) - req_path = _rfc3986.urlunsplit((None, None, path, query, frag)) - if not req_path.startswith("/"): - req_path = "/"+req_path - return req_path - -def request_port(request): - host = request.get_host() - i = host.find(':') - if i >= 0: - port = host[i+1:] - try: - int(port) - except ValueError: - debug("nonnumeric port: '%s'", port) - return None - else: - port = DEFAULT_HTTP_PORT - return port - -def request_is_unverifiable(request): - try: - return request.is_unverifiable() - except AttributeError: - if hasattr(request, "unverifiable"): - return request.unverifiable - else: - raise - -# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't -# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). -HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" -ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") -def uppercase_escaped_char(match): - return "%%%s" % match.group(1).upper() -def escape_path(path): - """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" - # There's no knowing what character encoding was used to create URLs - # containing %-escapes, but since we have to pick one to escape invalid - # path characters, we pick UTF-8, as recommended in the HTML 4.0 - # specification: - # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 - # And here, kind of: draft-fielding-uri-rfc2396bis-03 - # (And in draft IRI specification: draft-duerst-iri-05) - # (And here, for new URI schemes: RFC 2718) - if isinstance(path, types.UnicodeType): - path = path.encode("utf-8") - path = urllib.quote(path, HTTP_PATH_SAFE) - path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) - return path - -def reach(h): - """Return reach of host h, as defined by RFC 2965, section 1. - - The reach R of a host name H is defined as follows: - - * If - - - H is the host domain name of a host; and, - - - H has the form A.B; and - - - A has no embedded (that is, interior) dots; and - - - B has at least one embedded dot, or B is the string "local". - then the reach of H is .B. - - * Otherwise, the reach of H is H. - - >>> reach("www.acme.com") - '.acme.com' - >>> reach("acme.com") - 'acme.com' - >>> reach("acme.local") - '.local' - - """ - i = h.find(".") - if i >= 0: - #a = h[:i] # this line is only here to show what a is - b = h[i+1:] - i = b.find(".") - if is_HDN(h) and (i >= 0 or b == "local"): - return "."+b - return h - -def is_third_party(request): - """ - - RFC 2965, section 3.3.6: - - An unverifiable transaction is to a third-party host if its request- - host U does not domain-match the reach R of the request-host O in the - origin transaction. - - """ - req_host = request_host_lc(request) - # the origin request's request-host was stuffed into request by - # _urllib2_support.AbstractHTTPHandler - return not domain_match(req_host, reach(request.origin_req_host)) - - -class Cookie: - """HTTP Cookie. - - This class represents both Netscape and RFC 2965 cookies. - - This is deliberately a very simple class. It just holds attributes. It's - possible to construct Cookie instances that don't comply with the cookie - standards. CookieJar.make_cookies is the factory function for Cookie - objects -- it deals with cookie parsing, supplying defaults, and - normalising to the representation used in this class. CookiePolicy is - responsible for checking them to see whether they should be accepted from - and returned to the server. - - version: integer; - name: string; - value: string (may be None); - port: string; None indicates no attribute was supplied (eg. "Port", rather - than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list - string (eg. "80,8080") - port_specified: boolean; true if a value was supplied with the Port - cookie-attribute - domain: string; - domain_specified: boolean; true if Domain was explicitly set - domain_initial_dot: boolean; true if Domain as set in HTTP header by server - started with a dot (yes, this really is necessary!) - path: string; - path_specified: boolean; true if Path was explicitly set - secure: boolean; true if should only be returned over secure connection - expires: integer; seconds since epoch (RFC 2965 cookies should calculate - this value from the Max-Age attribute) - discard: boolean, true if this is a session cookie; (if no expires value, - this should be true) - comment: string; - comment_url: string; - rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not - Set-Cookie2:) header, but had a version cookie-attribute of 1 - rest: mapping of other cookie-attributes - - Note that the port may be present in the headers, but unspecified ("Port" - rather than"Port=80", for example); if this is the case, port is None. - - """ - - def __init__(self, version, name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest, - rfc2109=False, - ): - - if version is not None: version = int(version) - if expires is not None: expires = int(expires) - if port is None and port_specified is True: - raise ValueError("if port is None, port_specified must be false") - - self.version = version - self.name = name - self.value = value - self.port = port - self.port_specified = port_specified - # normalise case, as per RFC 2965 section 3.3.3 - self.domain = domain.lower() - self.domain_specified = domain_specified - # Sigh. We need to know whether the domain given in the - # cookie-attribute had an initial dot, in order to follow RFC 2965 - # (as clarified in draft errata). Needed for the returned $Domain - # value. - self.domain_initial_dot = domain_initial_dot - self.path = path - self.path_specified = path_specified - self.secure = secure - self.expires = expires - self.discard = discard - self.comment = comment - self.comment_url = comment_url - self.rfc2109 = rfc2109 - - self._rest = copy.copy(rest) - - def has_nonstandard_attr(self, name): - return self._rest.has_key(name) - def get_nonstandard_attr(self, name, default=None): - return self._rest.get(name, default) - def set_nonstandard_attr(self, name, value): - self._rest[name] = value - def nonstandard_attr_keys(self): - return self._rest.keys() - - def is_expired(self, now=None): - if now is None: now = time.time() - return (self.expires is not None) and (self.expires <= now) - - def __str__(self): - if self.port is None: p = "" - else: p = ":"+self.port - limit = self.domain + p + self.path - if self.value is not None: - namevalue = "%s=%s" % (self.name, self.value) - else: - namevalue = self.name - return "" % (namevalue, limit) - - def __repr__(self): - args = [] - for name in ["version", "name", "value", - "port", "port_specified", - "domain", "domain_specified", "domain_initial_dot", - "path", "path_specified", - "secure", "expires", "discard", "comment", "comment_url", - ]: - attr = getattr(self, name) - args.append("%s=%s" % (name, repr(attr))) - args.append("rest=%s" % repr(self._rest)) - args.append("rfc2109=%s" % repr(self.rfc2109)) - return "Cookie(%s)" % ", ".join(args) - - -class CookiePolicy: - """Defines which cookies get accepted from and returned to server. - - May also modify cookies. - - The subclass DefaultCookiePolicy defines the standard rules for Netscape - and RFC 2965 cookies -- override that if you want a customised policy. - - As well as implementing set_ok and return_ok, implementations of this - interface must also supply the following attributes, indicating which - protocols should be used, and how. These can be read and set at any time, - though whether that makes complete sense from the protocol point of view is - doubtful. - - Public attributes: - - netscape: implement netscape protocol - rfc2965: implement RFC 2965 protocol - rfc2109_as_netscape: - WARNING: This argument will change or go away if is not accepted into - the Python standard library in this form! - If true, treat RFC 2109 cookies as though they were Netscape cookies. The - default is for this attribute to be None, which means treat 2109 cookies - as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is, - by default), and as Netscape cookies otherwise. - hide_cookie2: don't add Cookie2 header to requests (the presence of - this header indicates to the server that we understand RFC 2965 - cookies) - - """ - def set_ok(self, cookie, request): - """Return true if (and only if) cookie should be accepted from server. - - Currently, pre-expired cookies never get this far -- the CookieJar - class deletes such cookies itself. - - cookie: mechanize.Cookie object - request: object implementing the interface defined by - CookieJar.extract_cookies.__doc__ - - """ - raise NotImplementedError() - - def return_ok(self, cookie, request): - """Return true if (and only if) cookie should be returned to server. - - cookie: mechanize.Cookie object - request: object implementing the interface defined by - CookieJar.add_cookie_header.__doc__ - - """ - raise NotImplementedError() - - def domain_return_ok(self, domain, request): - """Return false if cookies should not be returned, given cookie domain. - - This is here as an optimization, to remove the need for checking every - cookie with a particular domain (which may involve reading many files). - The default implementations of domain_return_ok and path_return_ok - (return True) leave all the work to return_ok. - - If domain_return_ok returns true for the cookie domain, path_return_ok - is called for the cookie path. Otherwise, path_return_ok and return_ok - are never called for that cookie domain. If path_return_ok returns - true, return_ok is called with the Cookie object itself for a full - check. Otherwise, return_ok is never called for that cookie path. - - Note that domain_return_ok is called for every *cookie* domain, not - just for the *request* domain. For example, the function might be - called with both ".acme.com" and "www.acme.com" if the request domain - is "www.acme.com". The same goes for path_return_ok. - - For argument documentation, see the docstring for return_ok. - - """ - return True - - def path_return_ok(self, path, request): - """Return false if cookies should not be returned, given cookie path. - - See the docstring for domain_return_ok. - - """ - return True - - -class DefaultCookiePolicy(CookiePolicy): - """Implements the standard rules for accepting and returning cookies. - - Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is - switched off by default. - - The easiest way to provide your own policy is to override this class and - call its methods in your overriden implementations before adding your own - additional checks. - - import mechanize - class MyCookiePolicy(mechanize.DefaultCookiePolicy): - def set_ok(self, cookie, request): - if not mechanize.DefaultCookiePolicy.set_ok( - self, cookie, request): - return False - if i_dont_want_to_store_this_cookie(): - return False - return True - - In addition to the features required to implement the CookiePolicy - interface, this class allows you to block and allow domains from setting - and receiving cookies. There are also some strictness switches that allow - you to tighten up the rather loose Netscape protocol rules a little bit (at - the cost of blocking some benign cookies). - - A domain blacklist and whitelist is provided (both off by default). Only - domains not in the blacklist and present in the whitelist (if the whitelist - is active) participate in cookie setting and returning. Use the - blocked_domains constructor argument, and blocked_domains and - set_blocked_domains methods (and the corresponding argument and methods for - allowed_domains). If you set a whitelist, you can turn it off again by - setting it to None. - - Domains in block or allow lists that do not start with a dot must - string-compare equal. For example, "acme.com" matches a blacklist entry of - "acme.com", but "www.acme.com" does not. Domains that do start with a dot - are matched by more specific domains too. For example, both "www.acme.com" - and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does - not). IP addresses are an exception, and must match exactly. For example, - if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is - blocked, but 193.168.1.2 is not. - - Additional Public Attributes: - - General strictness switches - - strict_domain: don't allow sites to set two-component domains with - country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc. - This is far from perfect and isn't guaranteed to work! - - RFC 2965 protocol strictness switches - - strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable - transactions (usually, an unverifiable transaction is one resulting from - a redirect or an image hosted on another site); if this is false, cookies - are NEVER blocked on the basis of verifiability - - Netscape protocol strictness switches - - strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions - even to Netscape cookies - strict_ns_domain: flags indicating how strict to be with domain-matching - rules for Netscape cookies: - DomainStrictNoDots: when setting cookies, host prefix must not contain a - dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because - www.foo contains a dot) - DomainStrictNonDomain: cookies that did not explicitly specify a Domain - cookie-attribute can only be returned to a domain that string-compares - equal to the domain that set the cookie (eg. rockets.acme.com won't - be returned cookies from acme.com that had no Domain cookie-attribute) - DomainRFC2965Match: when setting cookies, require a full RFC 2965 - domain-match - DomainLiberal and DomainStrict are the most useful combinations of the - above flags, for convenience - strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that - have names starting with '$' - strict_ns_set_path: don't allow setting cookies whose path doesn't - path-match request URI - - """ - - DomainStrictNoDots = 1 - DomainStrictNonDomain = 2 - DomainRFC2965Match = 4 - - DomainLiberal = 0 - DomainStrict = DomainStrictNoDots|DomainStrictNonDomain - - def __init__(self, - blocked_domains=None, allowed_domains=None, - netscape=True, rfc2965=False, - # WARNING: this argument will change or go away if is not - # accepted into the Python standard library in this form! - # default, ie. treat 2109 as netscape iff not rfc2965 - rfc2109_as_netscape=None, - hide_cookie2=False, - strict_domain=False, - strict_rfc2965_unverifiable=True, - strict_ns_unverifiable=False, - strict_ns_domain=DomainLiberal, - strict_ns_set_initial_dollar=False, - strict_ns_set_path=False, - ): - """ - Constructor arguments should be used as keyword arguments only. - - blocked_domains: sequence of domain names that we never accept cookies - from, nor return cookies to - allowed_domains: if not None, this is a sequence of the only domains - for which we accept and return cookies - - For other arguments, see CookiePolicy.__doc__ and - DefaultCookiePolicy.__doc__.. - - """ - self.netscape = netscape - self.rfc2965 = rfc2965 - self.rfc2109_as_netscape = rfc2109_as_netscape - self.hide_cookie2 = hide_cookie2 - self.strict_domain = strict_domain - self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable - self.strict_ns_unverifiable = strict_ns_unverifiable - self.strict_ns_domain = strict_ns_domain - self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar - self.strict_ns_set_path = strict_ns_set_path - - if blocked_domains is not None: - self._blocked_domains = tuple(blocked_domains) - else: - self._blocked_domains = () - - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def blocked_domains(self): - """Return the sequence of blocked domains (as a tuple).""" - return self._blocked_domains - def set_blocked_domains(self, blocked_domains): - """Set the sequence of blocked domains.""" - self._blocked_domains = tuple(blocked_domains) - - def is_blocked(self, domain): - for blocked_domain in self._blocked_domains: - if user_domain_match(domain, blocked_domain): - return True - return False - - def allowed_domains(self): - """Return None, or the sequence of allowed domains (as a tuple).""" - return self._allowed_domains - def set_allowed_domains(self, allowed_domains): - """Set the sequence of allowed domains, or None.""" - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def is_not_allowed(self, domain): - if self._allowed_domains is None: - return False - for allowed_domain in self._allowed_domains: - if user_domain_match(domain, allowed_domain): - return False - return True - - def set_ok(self, cookie, request): - """ - If you override set_ok, be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to accept). - - """ - debug(" - checking cookie %s", cookie) - - assert cookie.name is not None - - for n in "version", "verifiability", "name", "path", "domain", "port": - fn_name = "set_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - - return True - - def set_ok_version(self, cookie, request): - if cookie.version is None: - # Version is always set to 0 by parse_ns_headers if it's a Netscape - # cookie, so this must be an invalid RFC 2965 cookie. - debug(" Set-Cookie2 without version attribute (%s)", cookie) - return False - if cookie.version > 0 and not self.rfc2965: - debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - debug(" Netscape cookies are switched off") - return False - return True - - def set_ok_verifiability(self, cookie, request): - if request_is_unverifiable(request) and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - debug(" third-party RFC 2965 cookie during " - "unverifiable transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - debug(" third-party Netscape cookie during " - "unverifiable transaction") - return False - return True - - def set_ok_name(self, cookie, request): - # Try and stop servers setting V0 cookies designed to hack other - # servers that know both V0 and V1 protocols. - if (cookie.version == 0 and self.strict_ns_set_initial_dollar and - cookie.name.startswith("$")): - debug(" illegal name (starts with '$'): '%s'", cookie.name) - return False - return True - - def set_ok_path(self, cookie, request): - if cookie.path_specified: - req_path = request_path(request) - if ((cookie.version > 0 or - (cookie.version == 0 and self.strict_ns_set_path)) and - not req_path.startswith(cookie.path)): - debug(" path attribute %s is not a prefix of request " - "path %s", cookie.path, req_path) - return False - return True - - def set_ok_countrycode_domain(self, cookie, request): - """Return False if explicit cookie domain is not acceptable. - - Called by set_ok_domain, for convenience of overriding by - subclasses. - - """ - if cookie.domain_specified and self.strict_domain: - domain = cookie.domain - # since domain was specified, we know that: - assert domain.startswith(".") - if domain.count(".") == 2: - # domain like .foo.bar - i = domain.rfind(".") - tld = domain[i+1:] - sld = domain[1:i] - if (sld.lower() in [ - "co", "ac", - "com", "edu", "org", "net", "gov", "mil", "int", - "aero", "biz", "cat", "coop", "info", "jobs", "mobi", - "museum", "name", "pro", "travel", - ] and - len(tld) == 2): - # domain like .co.uk - return False - return True - - def set_ok_domain(self, cookie, request): - if self.is_blocked(cookie.domain): - debug(" domain %s is in user block-list", cookie.domain) - return False - if self.is_not_allowed(cookie.domain): - debug(" domain %s is not in user allow-list", cookie.domain) - return False - if not self.set_ok_countrycode_domain(cookie, request): - debug(" country-code second level domain %s", cookie.domain) - return False - if cookie.domain_specified: - req_host, erhn = eff_request_host_lc(request) - domain = cookie.domain - if domain.startswith("."): - undotted_domain = domain[1:] - else: - undotted_domain = domain - embedded_dots = (undotted_domain.find(".") >= 0) - if not embedded_dots and domain != ".local": - debug(" non-local domain %s contains no embedded dot", - domain) - return False - if cookie.version == 0: - if (not erhn.endswith(domain) and - (not erhn.startswith(".") and - not ("."+erhn).endswith(domain))): - debug(" effective request-host %s (even with added " - "initial dot) does not end end with %s", - erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainRFC2965Match)): - if not domain_match(erhn, domain): - debug(" effective request-host %s does not domain-match " - "%s", erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainStrictNoDots)): - host_prefix = req_host[:-len(domain)] - if (host_prefix.find(".") >= 0 and - not IPV4_RE.search(req_host)): - debug(" host prefix %s for domain %s contains a dot", - host_prefix, domain) - return False - return True - - def set_ok_port(self, cookie, request): - if cookie.port_specified: - req_port = request_port(request) - if req_port is None: - req_port = "80" - else: - req_port = str(req_port) - for p in cookie.port.split(","): - try: - int(p) - except ValueError: - debug(" bad port %s (not numeric)", p) - return False - if p == req_port: - break - else: - debug(" request port (%s) not found in %s", - req_port, cookie.port) - return False - return True - - def return_ok(self, cookie, request): - """ - If you override return_ok, be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to return). - - """ - # Path has already been checked by path_return_ok, and domain blocking - # done by domain_return_ok. - debug(" - checking cookie %s", cookie) - - for n in ("version", "verifiability", "secure", "expires", "port", - "domain"): - fn_name = "return_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - return True - - def return_ok_version(self, cookie, request): - if cookie.version > 0 and not self.rfc2965: - debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - debug(" Netscape cookies are switched off") - return False - return True - - def return_ok_verifiability(self, cookie, request): - if request_is_unverifiable(request) and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - debug(" third-party RFC 2965 cookie during unverifiable " - "transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - debug(" third-party Netscape cookie during unverifiable " - "transaction") - return False - return True - - def return_ok_secure(self, cookie, request): - if cookie.secure and request.get_type() != "https": - debug(" secure cookie with non-secure request") - return False - return True - - def return_ok_expires(self, cookie, request): - if cookie.is_expired(self._now): - debug(" cookie expired") - return False - return True - - def return_ok_port(self, cookie, request): - if cookie.port: - req_port = request_port(request) - if req_port is None: - req_port = "80" - for p in cookie.port.split(","): - if p == req_port: - break - else: - debug(" request port %s does not match cookie port %s", - req_port, cookie.port) - return False - return True - - def return_ok_domain(self, cookie, request): - req_host, erhn = eff_request_host_lc(request) - domain = cookie.domain - - # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't - if (cookie.version == 0 and - (self.strict_ns_domain & self.DomainStrictNonDomain) and - not cookie.domain_specified and domain != erhn): - debug(" cookie with unspecified domain does not string-compare " - "equal to request domain") - return False - - if cookie.version > 0 and not domain_match(erhn, domain): - debug(" effective request-host name %s does not domain-match " - "RFC 2965 cookie domain %s", erhn, domain) - return False - if cookie.version == 0 and not ("."+erhn).endswith(domain): - debug(" request-host %s does not match Netscape cookie domain " - "%s", req_host, domain) - return False - return True - - def domain_return_ok(self, domain, request): - # Liberal check of domain. This is here as an optimization to avoid - # having to load lots of MSIE cookie files unless necessary. - - # Munge req_host and erhn to always start with a dot, so as to err on - # the side of letting cookies through. - dotted_req_host, dotted_erhn = eff_request_host_lc(request) - if not dotted_req_host.startswith("."): - dotted_req_host = "."+dotted_req_host - if not dotted_erhn.startswith("."): - dotted_erhn = "."+dotted_erhn - if not (dotted_req_host.endswith(domain) or - dotted_erhn.endswith(domain)): - #debug(" request domain %s does not match cookie domain %s", - # req_host, domain) - return False - - if self.is_blocked(domain): - debug(" domain %s is in user block-list", domain) - return False - if self.is_not_allowed(domain): - debug(" domain %s is not in user allow-list", domain) - return False - - return True - - def path_return_ok(self, path, request): - debug("- checking cookie path=%s", path) - req_path = request_path(request) - if not req_path.startswith(path): - debug(" %s does not path-match %s", req_path, path) - return False - return True - - -def vals_sorted_by_key(adict): - keys = adict.keys() - keys.sort() - return map(adict.get, keys) - -class MappingIterator: - """Iterates over nested mapping, depth-first, in sorted order by key.""" - def __init__(self, mapping): - self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack - - def __iter__(self): return self - - def next(self): - # this is hairy because of lack of generators - while 1: - try: - vals, i, prev_item = self._s.pop() - except IndexError: - raise StopIteration() - if i < len(vals): - item = vals[i] - i = i + 1 - self._s.append((vals, i, prev_item)) - try: - item.items - except AttributeError: - # non-mapping - break - else: - # mapping - self._s.append((vals_sorted_by_key(item), 0, item)) - continue - return item - - -# Used as second parameter to dict.get method, to distinguish absent -# dict key from one with a None value. -class Absent: pass - -class CookieJar: - """Collection of HTTP cookies. - - You may not need to know about this class: try mechanize.urlopen(). - - The major methods are extract_cookies and add_cookie_header; these are all - you are likely to need. - - CookieJar supports the iterator protocol: - - for cookie in cookiejar: - # do something with cookie - - Methods: - - add_cookie_header(request) - extract_cookies(response, request) - get_policy() - set_policy(policy) - cookies_for_request(request) - make_cookies(response, request) - set_cookie_if_ok(cookie, request) - set_cookie(cookie) - clear_session_cookies() - clear_expired_cookies() - clear(domain=None, path=None, name=None) - - Public attributes - - policy: CookiePolicy object - - """ - - non_word_re = re.compile(r"\W") - quote_re = re.compile(r"([\"\\])") - strict_domain_re = re.compile(r"\.?[^.]*") - domain_re = re.compile(r"[^.]*") - dots_re = re.compile(r"^\.+") - - def __init__(self, policy=None): - """ - See CookieJar.__doc__ for argument documentation. - - """ - if policy is None: - policy = DefaultCookiePolicy() - self._policy = policy - - self._cookies = {} - - # for __getitem__ iteration in pre-2.2 Pythons - self._prev_getitem_index = 0 - - def get_policy(self): - return self._policy - - def set_policy(self, policy): - self._policy = policy - - def _cookies_for_domain(self, domain, request): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - debug("Checking %s for cookies to return", domain) - cookies_by_path = self._cookies[domain] - for path in cookies_by_path.keys(): - if not self._policy.path_return_ok(path, request): - continue - cookies_by_name = cookies_by_path[path] - for cookie in cookies_by_name.values(): - if not self._policy.return_ok(cookie, request): - debug(" not returning cookie") - continue - debug(" it's a match") - cookies.append(cookie) - return cookies - - def cookies_for_request(self, request): - """Return a list of cookies to be returned to server. - - The returned list of cookie instances is sorted in the order they - should appear in the Cookie: header for return to the server. - - See add_cookie_header.__doc__ for the interface required of the - request argument. - - New in version 0.1.10 - - """ - self._policy._now = self._now = int(time.time()) - cookies = self._cookies_for_request(request) - # add cookies in order of most specific (i.e. longest) path first - def decreasing_size(a, b): return cmp(len(b.path), len(a.path)) - cookies.sort(decreasing_size) - return cookies - - def _cookies_for_request(self, request): - """Return a list of cookies to be returned to server.""" - # this method still exists (alongside cookies_for_request) because it - # is part of an implied protected interface for subclasses of cookiejar - # XXX document that implied interface, or provide another way of - # implementing cookiejars than subclassing - cookies = [] - for domain in self._cookies.keys(): - cookies.extend(self._cookies_for_domain(domain, request)) - return cookies - - def _cookie_attrs(self, cookies): - """Return a list of cookie-attributes to be returned to server. - - The $Version attribute is also added when appropriate (currently only - once per request). - - >>> jar = CookieJar() - >>> ns_cookie = Cookie(0, "foo", '"bar"', None, False, - ... "example.com", False, False, - ... "/", False, False, None, True, - ... None, None, {}) - >>> jar._cookie_attrs([ns_cookie]) - ['foo="bar"'] - >>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False, - ... ".example.com", True, False, - ... "/", False, False, None, True, - ... None, None, {}) - >>> jar._cookie_attrs([rfc2965_cookie]) - ['$Version=1', 'foo=bar', '$Domain="example.com"'] - - """ - version_set = False - - attrs = [] - for cookie in cookies: - # set version of Cookie header - # XXX - # What should it be if multiple matching Set-Cookie headers have - # different versions themselves? - # Answer: there is no answer; was supposed to be settled by - # RFC 2965 errata, but that may never appear... - version = cookie.version - if not version_set: - version_set = True - if version > 0: - attrs.append("$Version=%s" % version) - - # quote cookie value if necessary - # (not for Netscape protocol, which already has any quotes - # intact, due to the poorly-specified Netscape Cookie: syntax) - if ((cookie.value is not None) and - self.non_word_re.search(cookie.value) and version > 0): - value = self.quote_re.sub(r"\\\1", cookie.value) - else: - value = cookie.value - - # add cookie-attributes to be returned in Cookie header - if cookie.value is None: - attrs.append(cookie.name) - else: - attrs.append("%s=%s" % (cookie.name, value)) - if version > 0: - if cookie.path_specified: - attrs.append('$Path="%s"' % cookie.path) - if cookie.domain.startswith("."): - domain = cookie.domain - if (not cookie.domain_initial_dot and - domain.startswith(".")): - domain = domain[1:] - attrs.append('$Domain="%s"' % domain) - if cookie.port is not None: - p = "$Port" - if cookie.port_specified: - p = p + ('="%s"' % cookie.port) - attrs.append(p) - - return attrs - - def add_cookie_header(self, request): - """Add correct Cookie: header to request (urllib2.Request object). - - The Cookie2 header is also added unless policy.hide_cookie2 is true. - - The request object (usually a urllib2.Request instance) must support - the methods get_full_url, get_host, is_unverifiable, get_type, - has_header, get_header, header_items and add_unredirected_header, as - documented by urllib2, and the port attribute (the port number). - Actually, RequestUpgradeProcessor will automatically upgrade your - Request object to one with has_header, get_header, header_items and - add_unredirected_header, if it lacks those methods, for compatibility - with pre-2.4 versions of urllib2. - - """ - debug("add_cookie_header") - cookies = self.cookies_for_request(request) - - attrs = self._cookie_attrs(cookies) - if attrs: - if not request.has_header("Cookie"): - request.add_unredirected_header("Cookie", "; ".join(attrs)) - - # if necessary, advertise that we know RFC 2965 - if self._policy.rfc2965 and not self._policy.hide_cookie2: - for cookie in cookies: - if cookie.version != 1 and not request.has_header("Cookie2"): - request.add_unredirected_header("Cookie2", '$Version="1"') - break - - self.clear_expired_cookies() - - def _normalized_cookie_tuples(self, attrs_set): - """Return list of tuples containing normalised cookie information. - - attrs_set is the list of lists of key,value pairs extracted from - the Set-Cookie or Set-Cookie2 headers. - - Tuples are name, value, standard, rest, where name and value are the - cookie name and value, standard is a dictionary containing the standard - cookie-attributes (discard, secure, version, expires or max-age, - domain, path and port) and rest is a dictionary containing the rest of - the cookie-attributes. - - """ - cookie_tuples = [] - - boolean_attrs = "discard", "secure" - value_attrs = ("version", - "expires", "max-age", - "domain", "path", "port", - "comment", "commenturl") - - for cookie_attrs in attrs_set: - name, value = cookie_attrs[0] - - # Build dictionary of standard cookie-attributes (standard) and - # dictionary of other cookie-attributes (rest). - - # Note: expiry time is normalised to seconds since epoch. V0 - # cookies should have the Expires cookie-attribute, and V1 cookies - # should have Max-Age, but since V1 includes RFC 2109 cookies (and - # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we - # accept either (but prefer Max-Age). - max_age_set = False - - bad_cookie = False - - standard = {} - rest = {} - for k, v in cookie_attrs[1:]: - lc = k.lower() - # don't lose case distinction for unknown fields - if lc in value_attrs or lc in boolean_attrs: - k = lc - if k in boolean_attrs and v is None: - # boolean cookie-attribute is present, but has no value - # (like "discard", rather than "port=80") - v = True - if standard.has_key(k): - # only first value is significant - continue - if k == "domain": - if v is None: - debug(" missing value for domain attribute") - bad_cookie = True - break - # RFC 2965 section 3.3.3 - v = v.lower() - if k == "expires": - if max_age_set: - # Prefer max-age to expires (like Mozilla) - continue - if v is None: - debug(" missing or invalid value for expires " - "attribute: treating as session cookie") - continue - if k == "max-age": - max_age_set = True - if v is None: - debug(" missing value for max-age attribute") - bad_cookie = True - break - try: - v = int(v) - except ValueError: - debug(" missing or invalid (non-numeric) value for " - "max-age attribute") - bad_cookie = True - break - # convert RFC 2965 Max-Age to seconds since epoch - # XXX Strictly you're supposed to follow RFC 2616 - # age-calculation rules. Remember that zero Max-Age is a - # is a request to discard (old and new) cookie, though. - k = "expires" - v = self._now + v - if (k in value_attrs) or (k in boolean_attrs): - if (v is None and - k not in ["port", "comment", "commenturl"]): - debug(" missing value for %s attribute" % k) - bad_cookie = True - break - standard[k] = v - else: - rest[k] = v - - if bad_cookie: - continue - - cookie_tuples.append((name, value, standard, rest)) - - return cookie_tuples - - def _cookie_from_cookie_tuple(self, tup, request): - # standard is dict of standard cookie-attributes, rest is dict of the - # rest of them - name, value, standard, rest = tup - - domain = standard.get("domain", Absent) - path = standard.get("path", Absent) - port = standard.get("port", Absent) - expires = standard.get("expires", Absent) - - # set the easy defaults - version = standard.get("version", None) - if version is not None: - try: - version = int(version) - except ValueError: - return None # invalid version, ignore cookie - secure = standard.get("secure", False) - # (discard is also set if expires is Absent) - discard = standard.get("discard", False) - comment = standard.get("comment", None) - comment_url = standard.get("commenturl", None) - - # set default path - if path is not Absent and path != "": - path_specified = True - path = escape_path(path) - else: - path_specified = False - path = request_path(request) - i = path.rfind("/") - if i != -1: - if version == 0: - # Netscape spec parts company from reality here - path = path[:i] - else: - path = path[:i+1] - if len(path) == 0: path = "/" - - # set default domain - domain_specified = domain is not Absent - # but first we have to remember whether it starts with a dot - domain_initial_dot = False - if domain_specified: - domain_initial_dot = bool(domain.startswith(".")) - if domain is Absent: - req_host, erhn = eff_request_host_lc(request) - domain = erhn - elif not domain.startswith("."): - domain = "."+domain - - # set default port - port_specified = False - if port is not Absent: - if port is None: - # Port attr present, but has no value: default to request port. - # Cookie should then only be sent back on that port. - port = request_port(request) - else: - port_specified = True - port = re.sub(r"\s+", "", port) - else: - # No port attr present. Cookie can be sent back on any port. - port = None - - # set default expires and discard - if expires is Absent: - expires = None - discard = True - - return Cookie(version, - name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest) - - def _cookies_from_attrs_set(self, attrs_set, request): - cookie_tuples = self._normalized_cookie_tuples(attrs_set) - - cookies = [] - for tup in cookie_tuples: - cookie = self._cookie_from_cookie_tuple(tup, request) - if cookie: cookies.append(cookie) - return cookies - - def _process_rfc2109_cookies(self, cookies): - if self._policy.rfc2109_as_netscape is None: - rfc2109_as_netscape = not self._policy.rfc2965 - else: - rfc2109_as_netscape = self._policy.rfc2109_as_netscape - for cookie in cookies: - if cookie.version == 1: - cookie.rfc2109 = True - if rfc2109_as_netscape: - # treat 2109 cookies as Netscape cookies rather than - # as RFC2965 cookies - cookie.version = 0 - - def _make_cookies(self, response, request): - # get cookie-attributes for RFC 2965 and Netscape protocols - headers = response.info() - rfc2965_hdrs = headers.getheaders("Set-Cookie2") - ns_hdrs = headers.getheaders("Set-Cookie") - - rfc2965 = self._policy.rfc2965 - netscape = self._policy.netscape - - if ((not rfc2965_hdrs and not ns_hdrs) or - (not ns_hdrs and not rfc2965) or - (not rfc2965_hdrs and not netscape) or - (not netscape and not rfc2965)): - return [] # no relevant cookie headers: quick exit - - try: - cookies = self._cookies_from_attrs_set( - split_header_words(rfc2965_hdrs), request) - except: - reraise_unmasked_exceptions() - cookies = [] - - if ns_hdrs and netscape: - try: - # RFC 2109 and Netscape cookies - ns_cookies = self._cookies_from_attrs_set( - parse_ns_headers(ns_hdrs), request) - except: - reraise_unmasked_exceptions() - ns_cookies = [] - self._process_rfc2109_cookies(ns_cookies) - - # Look for Netscape cookies (from Set-Cookie headers) that match - # corresponding RFC 2965 cookies (from Set-Cookie2 headers). - # For each match, keep the RFC 2965 cookie and ignore the Netscape - # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are - # bundled in with the Netscape cookies for this purpose, which is - # reasonable behaviour. - if rfc2965: - lookup = {} - for cookie in cookies: - lookup[(cookie.domain, cookie.path, cookie.name)] = None - - def no_matching_rfc2965(ns_cookie, lookup=lookup): - key = ns_cookie.domain, ns_cookie.path, ns_cookie.name - return not lookup.has_key(key) - ns_cookies = filter(no_matching_rfc2965, ns_cookies) - - if ns_cookies: - cookies.extend(ns_cookies) - - return cookies - - def make_cookies(self, response, request): - """Return sequence of Cookie objects extracted from response object. - - See extract_cookies.__doc__ for the interface required of the - response and request arguments. - - """ - self._policy._now = self._now = int(time.time()) - return [cookie for cookie in self._make_cookies(response, request) - if cookie.expires is None or not cookie.expires <= self._now] - - def set_cookie_if_ok(self, cookie, request): - """Set a cookie if policy says it's OK to do so. - - cookie: mechanize.Cookie instance - request: see extract_cookies.__doc__ for the required interface - - """ - self._policy._now = self._now = int(time.time()) - - if self._policy.set_ok(cookie, request): - self.set_cookie(cookie) - - def set_cookie(self, cookie): - """Set a cookie, without checking whether or not it should be set. - - cookie: mechanize.Cookie instance - """ - c = self._cookies - if not c.has_key(cookie.domain): c[cookie.domain] = {} - c2 = c[cookie.domain] - if not c2.has_key(cookie.path): c2[cookie.path] = {} - c3 = c2[cookie.path] - c3[cookie.name] = cookie - - def extract_cookies(self, response, request): - """Extract cookies from response, where allowable given the request. - - Look for allowable Set-Cookie: and Set-Cookie2: headers in the response - object passed as argument. Any of these headers that are found are - used to update the state of the object (subject to the policy.set_ok - method's approval). - - The response object (usually be the result of a call to - mechanize.urlopen, or similar) should support an info method, which - returns a mimetools.Message object (in fact, the 'mimetools.Message - object' may be any object that provides a getheaders method). - - The request object (usually a urllib2.Request instance) must support - the methods get_full_url, get_type, get_host, and is_unverifiable, as - documented by urllib2, and the port attribute (the port number). The - request is used to set default values for cookie-attributes as well as - for checking that the cookie is OK to be set. - - """ - debug("extract_cookies: %s", response.info()) - self._policy._now = self._now = int(time.time()) - - for cookie in self._make_cookies(response, request): - if cookie.expires is not None and cookie.expires <= self._now: - # Expiry date in past is request to delete cookie. This can't be - # in DefaultCookiePolicy, because can't delete cookies there. - try: - self.clear(cookie.domain, cookie.path, cookie.name) - except KeyError: - pass - debug("Expiring cookie, domain='%s', path='%s', name='%s'", - cookie.domain, cookie.path, cookie.name) - elif self._policy.set_ok(cookie, request): - debug(" setting cookie: %s", cookie) - self.set_cookie(cookie) - - def clear(self, domain=None, path=None, name=None): - """Clear some cookies. - - Invoking this method without arguments will clear all cookies. If - given a single argument, only cookies belonging to that domain will be - removed. If given two arguments, cookies belonging to the specified - path within that domain are removed. If given three arguments, then - the cookie with the specified name, path and domain is removed. - - Raises KeyError if no matching cookie exists. - - """ - if name is not None: - if (domain is None) or (path is None): - raise ValueError( - "domain and path must be given to remove a cookie by name") - del self._cookies[domain][path][name] - elif path is not None: - if domain is None: - raise ValueError( - "domain must be given to remove cookies by path") - del self._cookies[domain][path] - elif domain is not None: - del self._cookies[domain] - else: - self._cookies = {} - - def clear_session_cookies(self): - """Discard all session cookies. - - Discards all cookies held by object which had either no Max-Age or - Expires cookie-attribute or an explicit Discard cookie-attribute, or - which otherwise have ended up with a true discard attribute. For - interactive browsers, the end of a session usually corresponds to - closing the browser window. - - Note that the save method won't save session cookies anyway, unless you - ask otherwise by passing a true ignore_discard argument. - - """ - for cookie in self: - if cookie.discard: - self.clear(cookie.domain, cookie.path, cookie.name) - - def clear_expired_cookies(self): - """Discard all expired cookies. - - You probably don't need to call this method: expired cookies are never - sent back to the server (provided you're using DefaultCookiePolicy), - this method is called by CookieJar itself every so often, and the save - method won't save expired cookies anyway (unless you ask otherwise by - passing a true ignore_expires argument). - - """ - now = time.time() - for cookie in self: - if cookie.is_expired(now): - self.clear(cookie.domain, cookie.path, cookie.name) - - def __getitem__(self, i): - if i == 0: - self._getitem_iterator = self.__iter__() - elif self._prev_getitem_index != i-1: raise IndexError( - "CookieJar.__getitem__ only supports sequential iteration") - self._prev_getitem_index = i - try: - return self._getitem_iterator.next() - except StopIteration: - raise IndexError() - - def __iter__(self): - return MappingIterator(self._cookies) - - def __len__(self): - """Return number of contained cookies.""" - i = 0 - for cookie in self: i = i + 1 - return i - - def __repr__(self): - r = [] - for cookie in self: r.append(repr(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - def __str__(self): - r = [] - for cookie in self: r.append(str(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - -class LoadError(Exception): pass - -class FileCookieJar(CookieJar): - """CookieJar that can be loaded from and saved to a file. - - Additional methods - - save(filename=None, ignore_discard=False, ignore_expires=False) - load(filename=None, ignore_discard=False, ignore_expires=False) - revert(filename=None, ignore_discard=False, ignore_expires=False) - - Additional public attributes - - filename: filename for loading and saving cookies - - Additional public readable attributes - - delayload: request that cookies are lazily loaded from disk; this is only - a hint since this only affects performance, not behaviour (unless the - cookies on disk are changing); a CookieJar object may ignore it (in fact, - only MSIECookieJar lazily loads cookies at the moment) - - """ - - def __init__(self, filename=None, delayload=False, policy=None): - """ - See FileCookieJar.__doc__ for argument documentation. - - Cookies are NOT loaded from the named file until either the load or - revert method is called. - - """ - CookieJar.__init__(self, policy) - if filename is not None and not isstringlike(filename): - raise ValueError("filename must be string-like") - self.filename = filename - self.delayload = bool(delayload) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - """Save cookies to a file. - - filename: name of file in which to save cookies - ignore_discard: save even cookies set to be discarded - ignore_expires: save even cookies that have expired - - The file is overwritten if it already exists, thus wiping all its - cookies. Saved cookies can be restored later using the load or revert - methods. If filename is not specified, self.filename is used; if - self.filename is None, ValueError is raised. - - """ - raise NotImplementedError() - - def load(self, filename=None, ignore_discard=False, ignore_expires=False): - """Load cookies from a file. - - Old cookies are kept unless overwritten by newly loaded ones. - - Arguments are as for .save(). - - If filename is not specified, self.filename is used; if self.filename - is None, ValueError is raised. The named file must be in the format - understood by the class, or LoadError will be raised. This format will - be identical to that written by the save method, unless the load format - is not sufficiently well understood (as is the case for MSIECookieJar). - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename) - try: - self._really_load(f, filename, ignore_discard, ignore_expires) - finally: - f.close() - - def revert(self, filename=None, - ignore_discard=False, ignore_expires=False): - """Clear all cookies and reload cookies from a saved file. - - Raises LoadError (or IOError) if reversion is not successful; the - object's state will not be altered if this happens. - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - old_state = copy.deepcopy(self._cookies) - self._cookies = {} - try: - self.load(filename, ignore_discard, ignore_expires) - except (LoadError, IOError): - self._cookies = old_state - raise diff --git a/samples-and-tests/i-am-a-developer/mechanize/_debug.py b/samples-and-tests/i-am-a-developer/mechanize/_debug.py deleted file mode 100644 index 596b11477e..0000000000 --- a/samples-and-tests/i-am-a-developer/mechanize/_debug.py +++ /dev/null @@ -1,28 +0,0 @@ -import logging - -from urllib2 import BaseHandler -from _response import response_seek_wrapper - - -class HTTPResponseDebugProcessor(BaseHandler): - handler_order = 900 # before redirections, after everything else - - def http_response(self, request, response): - if not hasattr(response, "seek"): - response = response_seek_wrapper(response) - info = logging.getLogger("mechanize.http_responses").info - try: - info(response.read()) - finally: - response.seek(0) - info("*****************************************************") - return response - - https_response = http_response - -class HTTPRedirectDebugProcessor(BaseHandler): - def http_request(self, request): - if hasattr(request, "redirect_dict"): - info = logging.getLogger("mechanize.http_redirects").info - info("redirecting to %s", request.get_full_url()) - return request diff --git a/samples-and-tests/i-am-a-developer/mechanize/_file.py b/samples-and-tests/i-am-a-developer/mechanize/_file.py deleted file mode 100644 index db662a8ff0..0000000000 --- a/samples-and-tests/i-am-a-developer/mechanize/_file.py +++ /dev/null @@ -1,60 +0,0 @@ -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO -import mimetools -import os -import socket -import urllib -from urllib2 import BaseHandler, URLError - - -class FileHandler(BaseHandler): - # Use local file or FTP depending on form of URL - def file_open(self, req): - url = req.get_selector() - if url[:2] == '//' and url[2:3] != '/': - req.type = 'ftp' - return self.parent.open(req) - else: - return self.open_local_file(req) - - # names for the localhost - names = None - def get_names(self): - if FileHandler.names is None: - try: - FileHandler.names = (socket.gethostbyname('localhost'), - socket.gethostbyname(socket.gethostname())) - except socket.gaierror: - FileHandler.names = (socket.gethostbyname('localhost'),) - return FileHandler.names - - # not entirely sure what the rules are here - def open_local_file(self, req): - try: - import email.utils as emailutils - except ImportError: - import email.Utils as emailutils - import mimetypes - host = req.get_host() - file = req.get_selector() - localfile = urllib.url2pathname(file) - try: - stats = os.stat(localfile) - size = stats.st_size - modified = emailutils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(file)[0] - headers = mimetools.Message(StringIO( - 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) - if host: - host, port = urllib.splitport(host) - if not host or \ - (not port and socket.gethostbyname(host) in self.get_names()): - return urllib.addinfourl(open(localfile, 'rb'), - headers, 'file:'+file) - except OSError, msg: - # urllib2 users shouldn't expect OSErrors coming from urlopen() - raise URLError(msg) - raise URLError('file not on local host') diff --git a/samples-and-tests/i-am-a-developer/mechanize/_firefox3cookiejar.py b/samples-and-tests/i-am-a-developer/mechanize/_firefox3cookiejar.py deleted file mode 100644 index 34fe9799ed..0000000000 --- a/samples-and-tests/i-am-a-developer/mechanize/_firefox3cookiejar.py +++ /dev/null @@ -1,249 +0,0 @@ -"""Firefox 3 "cookies.sqlite" cookie persistence. - -Copyright 2008 John J Lee - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import logging -import time -import sqlite3 - -from _clientcookie import CookieJar, Cookie, MappingIterator -from _util import isstringlike, experimental -debug = logging.getLogger("mechanize.cookies").debug - - -class Firefox3CookieJar(CookieJar): - - """Firefox 3 cookie jar. - - The cookies are stored in Firefox 3's "cookies.sqlite" format. - - Constructor arguments: - - filename: filename of cookies.sqlite (typically found at the top level - of a firefox profile directory) - autoconnect: as a convenience, connect to the SQLite cookies database at - Firefox3CookieJar construction time (default True) - policy: an object satisfying the mechanize.CookiePolicy interface - - Note that this is NOT a FileCookieJar, and there are no .load(), - .save() or .restore() methods. The database is in sync with the - cookiejar object's state after each public method call. - - Following Firefox's own behaviour, session cookies are never saved to - the database. - - The file is created, and an sqlite database written to it, if it does - not already exist. The moz_cookies database table is created if it does - not already exist. - """ - - # XXX - # handle DatabaseError exceptions - # add a FileCookieJar (explicit .save() / .revert() / .load() methods) - - def __init__(self, filename, autoconnect=True, policy=None): - experimental("Firefox3CookieJar is experimental code") - CookieJar.__init__(self, policy) - if filename is not None and not isstringlike(filename): - raise ValueError("filename must be string-like") - self.filename = filename - self._conn = None - if autoconnect: - self.connect() - - def connect(self): - self._conn = sqlite3.connect(self.filename) - self._conn.isolation_level = "DEFERRED" - self._create_table_if_necessary() - - def close(self): - self._conn.close() - - def _transaction(self, func): - try: - cur = self._conn.cursor() - try: - result = func(cur) - finally: - cur.close() - except: - self._conn.rollback() - raise - else: - self._conn.commit() - return result - - def _execute(self, query, params=()): - return self._transaction(lambda cur: cur.execute(query, params)) - - def _query(self, query, params=()): - # XXX should we bother with a transaction? - cur = self._conn.cursor() - try: - cur.execute(query, params) - for row in cur.fetchall(): - yield row - finally: - cur.close() - - def _create_table_if_necessary(self): - self._execute("""\ -CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT, - value TEXT, host TEXT, path TEXT,expiry INTEGER, - lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""") - - def _cookie_from_row(self, row): - (pk, name, value, domain, path, expires, - last_accessed, secure, http_only) = row - - version = 0 - domain = domain.encode("ascii", "ignore") - path = path.encode("ascii", "ignore") - name = name.encode("ascii", "ignore") - value = value.encode("ascii", "ignore") - secure = bool(secure) - - # last_accessed isn't a cookie attribute, so isn't added to rest - rest = {} - if http_only: - rest["HttpOnly"] = None - - if name == "": - name = value - value = None - - initial_dot = domain.startswith(".") - domain_specified = initial_dot - - discard = False - if expires == "": - expires = None - discard = True - - return Cookie(version, name, value, - None, False, - domain, domain_specified, initial_dot, - path, False, - secure, - expires, - discard, - None, - None, - rest) - - def clear(self, domain=None, path=None, name=None): - CookieJar.clear(self, domain, path, name) - where_parts = [] - sql_params = [] - if domain is not None: - where_parts.append("host = ?") - sql_params.append(domain) - if path is not None: - where_parts.append("path = ?") - sql_params.append(path) - if name is not None: - where_parts.append("name = ?") - sql_params.append(name) - where = " AND ".join(where_parts) - if where: - where = " WHERE " + where - def clear(cur): - cur.execute("DELETE FROM moz_cookies%s" % where, - tuple(sql_params)) - self._transaction(clear) - - def _row_from_cookie(self, cookie, cur): - expires = cookie.expires - if cookie.discard: - expires = "" - - domain = unicode(cookie.domain) - path = unicode(cookie.path) - name = unicode(cookie.name) - value = unicode(cookie.value) - secure = bool(int(cookie.secure)) - - if value is None: - value = name - name = "" - - last_accessed = int(time.time()) - http_only = cookie.has_nonstandard_attr("HttpOnly") - - query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""") - pk = query.fetchone()[0] - if pk is None: - pk = 1 - - return (pk, name, value, domain, path, expires, - last_accessed, secure, http_only) - - def set_cookie(self, cookie): - if cookie.discard: - CookieJar.set_cookie(self, cookie) - return - - def set_cookie(cur): - # XXX - # is this RFC 2965-correct? - # could this do an UPDATE instead? - row = self._row_from_cookie(cookie, cur) - name, unused, domain, path = row[1:5] - cur.execute("""\ -DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""", - (domain, path, name)) - cur.execute("""\ -INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) -""", row) - self._transaction(set_cookie) - - def __iter__(self): - # session (non-persistent) cookies - for cookie in MappingIterator(self._cookies): - yield cookie - # persistent cookies - for row in self._query("""\ -SELECT * FROM moz_cookies ORDER BY name, path, host"""): - yield self._cookie_from_row(row) - - def _cookies_for_request(self, request): - session_cookies = CookieJar._cookies_for_request(self, request) - def get_cookies(cur): - query = cur.execute("SELECT host from moz_cookies") - domains = [row[0] for row in query.fetchmany()] - cookies = [] - for domain in domains: - cookies += self._persistent_cookies_for_domain(domain, - request, cur) - return cookies - persistent_coookies = self._transaction(get_cookies) - return session_cookies + persistent_coookies - - def _persistent_cookies_for_domain(self, domain, request, cur): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - debug("Checking %s for cookies to return", domain) - query = cur.execute("""\ -SELECT * from moz_cookies WHERE host = ? ORDER BY path""", - (domain,)) - cookies = [self._cookie_from_row(row) for row in query.fetchmany()] - last_path = None - r = [] - for cookie in cookies: - if (cookie.path != last_path and - not self._policy.path_return_ok(cookie.path, request)): - last_path = cookie.path - continue - if not self._policy.return_ok(cookie, request): - debug(" not returning cookie") - continue - debug(" it's a match") - r.append(cookie) - return r diff --git a/samples-and-tests/i-am-a-developer/mechanize/_gzip.py b/samples-and-tests/i-am-a-developer/mechanize/_gzip.py deleted file mode 100644 index 26c2743832..0000000000 --- a/samples-and-tests/i-am-a-developer/mechanize/_gzip.py +++ /dev/null @@ -1,103 +0,0 @@ -import urllib2 -from cStringIO import StringIO -import _response - -# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library -class GzipConsumer: - - def __init__(self, consumer): - self.__consumer = consumer - self.__decoder = None - self.__data = "" - - def __getattr__(self, key): - return getattr(self.__consumer, key) - - def feed(self, data): - if self.__decoder is None: - # check if we have a full gzip header - data = self.__data + data - try: - i = 10 - flag = ord(data[3]) - if flag & 4: # extra - x = ord(data[i]) + 256*ord(data[i+1]) - i = i + 2 + x - if flag & 8: # filename - while ord(data[i]): - i = i + 1 - i = i + 1 - if flag & 16: # comment - while ord(data[i]): - i = i + 1 - i = i + 1 - if flag & 2: # crc - i = i + 2 - if len(data) < i: - raise IndexError("not enough data") - if data[:3] != "\x1f\x8b\x08": - raise IOError("invalid gzip data") - data = data[i:] - except IndexError: - self.__data = data - return # need more data - import zlib - self.__data = "" - self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS) - data = self.__decoder.decompress(data) - if data: - self.__consumer.feed(data) - - def close(self): - if self.__decoder: - data = self.__decoder.flush() - if data: - self.__consumer.feed(data) - self.__consumer.close() - - -# -------------------------------------------------------------------- - -# the rest of this module is John Lee's stupid code, not -# Fredrik's nice code :-) - -class stupid_gzip_consumer: - def __init__(self): self.data = [] - def feed(self, data): self.data.append(data) - -class stupid_gzip_wrapper(_response.closeable_response): - def __init__(self, response): - self._response = response - - c = stupid_gzip_consumer() - gzc = GzipConsumer(c) - gzc.feed(response.read()) - self.__data = StringIO("".join(c.data)) - - def read(self, size=-1): - return self.__data.read(size) - def readline(self, size=-1): - return self.__data.readline(size) - def readlines(self, sizehint=-1): - return self.__data.readlines(sizehint) - - def __getattr__(self, name): - # delegate unknown methods/attributes - return getattr(self._response, name) - -class HTTPGzipProcessor(urllib2.BaseHandler): - handler_order = 200 # response processing before HTTPEquivProcessor - - def http_request(self, request): - request.add_header("Accept-Encoding", "gzip") - return request - - def http_response(self, request, response): - # post-process response - enc_hdrs = response.info().getheaders("Content-encoding") - for enc_hdr in enc_hdrs: - if ("gzip" in enc_hdr) or ("compress" in enc_hdr): - return stupid_gzip_wrapper(response) - return response - - https_response = http_response diff --git a/samples-and-tests/i-am-a-developer/mechanize/_headersutil.py b/samples-and-tests/i-am-a-developer/mechanize/_headersutil.py deleted file mode 100644 index 49ba5de022..0000000000 --- a/samples-and-tests/i-am-a-developer/mechanize/_headersutil.py +++ /dev/null @@ -1,232 +0,0 @@ -"""Utility functions for HTTP header value parsing and construction. - -Copyright 1997-1998, Gisle Aas -Copyright 2002-2006, John J. Lee - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import os, re -from types import StringType -from types import UnicodeType -STRING_TYPES = StringType, UnicodeType - -from _util import http2time -import _rfc3986 - -def is_html(ct_headers, url, allow_xhtml=False): - """ - ct_headers: Sequence of Content-Type headers - url: Response URL - - """ - if not ct_headers: - # guess - ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1] - html_exts = [".htm", ".html"] - if allow_xhtml: - html_exts += [".xhtml"] - return ext in html_exts - # use first header - ct = split_header_words(ct_headers)[0][0][0] - html_types = ["text/html"] - if allow_xhtml: - html_types += [ - "text/xhtml", "text/xml", - "application/xml", "application/xhtml+xml", - ] - return ct in html_types - -def unmatched(match): - """Return unmatched part of re.Match object.""" - start, end = match.span(0) - return match.string[:start]+match.string[end:] - -token_re = re.compile(r"^\s*([^=\s;,]+)") -quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") -value_re = re.compile(r"^\s*=\s*([^\s;,]*)") -escape_re = re.compile(r"\\(.)") -def split_header_words(header_values): - r"""Parse header values into a list of lists containing key,value pairs. - - The function knows how to deal with ",", ";" and "=" as well as quoted - values after "=". A list of space separated tokens are parsed as if they - were separated by ";". - - If the header_values passed as argument contains multiple values, then they - are treated as if they were a single value separated by comma ",". - - This means that this function is useful for parsing header fields that - follow this syntax (BNF as from the HTTP/1.1 specification, but we relax - the requirement for tokens). - - headers = #header - header = (token | parameter) *( [";"] (token | parameter)) - - token = 1* - separators = "(" | ")" | "<" | ">" | "@" - | "," | ";" | ":" | "\" | <"> - | "/" | "[" | "]" | "?" | "=" - | "{" | "}" | SP | HT - - quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) - qdtext = > - quoted-pair = "\" CHAR - - parameter = attribute "=" value - attribute = token - value = token | quoted-string - - Each header is represented by a list of key/value pairs. The value for a - simple token (not part of a parameter) is None. Syntactically incorrect - headers will not necessarily be parsed as you would want. - - This is easier to describe with some examples: - - >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) - [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] - >>> split_header_words(['text/html; charset="iso-8859-1"']) - [[('text/html', None), ('charset', 'iso-8859-1')]] - >>> split_header_words([r'Basic realm="\"foo\bar\""']) - [[('Basic', None), ('realm', '"foobar"')]] - - """ - assert type(header_values) not in STRING_TYPES - result = [] - for text in header_values: - orig_text = text - pairs = [] - while text: - m = token_re.search(text) - if m: - text = unmatched(m) - name = m.group(1) - m = quoted_value_re.search(text) - if m: # quoted value - text = unmatched(m) - value = m.group(1) - value = escape_re.sub(r"\1", value) - else: - m = value_re.search(text) - if m: # unquoted value - text = unmatched(m) - value = m.group(1) - value = value.rstrip() - else: - # no value, a lone token - value = None - pairs.append((name, value)) - elif text.lstrip().startswith(","): - # concatenated headers, as per RFC 2616 section 4.2 - text = text.lstrip()[1:] - if pairs: result.append(pairs) - pairs = [] - else: - # skip junk - non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) - assert nr_junk_chars > 0, ( - "split_header_words bug: '%s', '%s', %s" % - (orig_text, text, pairs)) - text = non_junk - if pairs: result.append(pairs) - return result - -join_escape_re = re.compile(r"([\"\\])") -def join_header_words(lists): - """Do the inverse of the conversion done by split_header_words. - - Takes a list of lists of (key, value) pairs and produces a single header - value. Attribute values are quoted if needed. - - >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) - 'text/plain; charset="iso-8859/1"' - >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) - 'text/plain, charset="iso-8859/1"' - - """ - headers = [] - for pairs in lists: - attr = [] - for k, v in pairs: - if v is not None: - if not re.search(r"^\w+$", v): - v = join_escape_re.sub(r"\\\1", v) # escape " and \ - v = '"%s"' % v - if k is None: # Netscape cookies may have no name - k = v - else: - k = "%s=%s" % (k, v) - attr.append(k) - if attr: headers.append("; ".join(attr)) - return ", ".join(headers) - -def strip_quotes(text): - if text.startswith('"'): - text = text[1:] - if text.endswith('"'): - text = text[:-1] - return text - -def parse_ns_headers(ns_headers): - """Ad-hoc parser for Netscape protocol cookie-attributes. - - The old Netscape cookie format for Set-Cookie can for instance contain - an unquoted "," in the expires field, so we have to use this ad-hoc - parser instead of split_header_words. - - XXX This may not make the best possible effort to parse all the crap - that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient - parser is probably better, so could do worse than following that if - this ever gives any trouble. - - Currently, this is also used for parsing RFC 2109 cookies. - - """ - known_attrs = ("expires", "domain", "path", "secure", - # RFC 2109 attrs (may turn up in Netscape cookies, too) - "version", "port", "max-age") - - result = [] - for ns_header in ns_headers: - pairs = [] - version_set = False - params = re.split(r";\s*", ns_header) - for ii in range(len(params)): - param = params[ii] - param = param.rstrip() - if param == "": continue - if "=" not in param: - k, v = param, None - else: - k, v = re.split(r"\s*=\s*", param, 1) - k = k.lstrip() - if ii != 0: - lc = k.lower() - if lc in known_attrs: - k = lc - if k == "version": - # This is an RFC 2109 cookie. - v = strip_quotes(v) - version_set = True - if k == "expires": - # convert expires date to seconds since epoch - v = http2time(strip_quotes(v)) # None if invalid - pairs.append((k, v)) - - if pairs: - if not version_set: - pairs.append(("version", "0")) - result.append(pairs) - - return result - - -def _test(): - import doctest, _headersutil - return doctest.testmod(_headersutil) - -if __name__ == "__main__": - _test() diff --git a/samples-and-tests/i-am-a-developer/mechanize/_html.py b/samples-and-tests/i-am-a-developer/mechanize/_html.py deleted file mode 100644 index 5da0815380..0000000000 --- a/samples-and-tests/i-am-a-developer/mechanize/_html.py +++ /dev/null @@ -1,631 +0,0 @@ -"""HTML handling. - -Copyright 2003-2006 John J. Lee - -This code is free software; you can redistribute it and/or modify it under -the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt -included with the distribution). - -""" - -import re, copy, htmlentitydefs -import sgmllib, ClientForm - -import _request -from _headersutil import split_header_words, is_html as _is_html -import _rfc3986 - -DEFAULT_ENCODING = "latin-1" - -COMPRESS_RE = re.compile(r"\s+") - - -# the base classe is purely for backwards compatibility -class ParseError(ClientForm.ParseError): pass - - -class CachingGeneratorFunction(object): - """Caching wrapper around a no-arguments iterable.""" - - def __init__(self, iterable): - self._cache = [] - # wrap iterable to make it non-restartable (otherwise, repeated - # __call__ would give incorrect results) - self._iterator = iter(iterable) - - def __call__(self): - cache = self._cache - for item in cache: - yield item - for item in self._iterator: - cache.append(item) - yield item - - -class EncodingFinder: - def __init__(self, default_encoding): - self._default_encoding = default_encoding - def encoding(self, response): - # HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV - # headers may be in the response. HTTP-EQUIV headers come last, - # so try in order from first to last. - for ct in response.info().getheaders("content-type"): - for k, v in split_header_words([ct])[0]: - if k == "charset": - return v - return self._default_encoding - -class ResponseTypeFinder: - def __init__(self, allow_xhtml): - self._allow_xhtml = allow_xhtml - def is_html(self, response, encoding): - ct_hdrs = response.info().getheaders("content-type") - url = response.geturl() - # XXX encoding - return _is_html(ct_hdrs, url, self._allow_xhtml) - - -# idea for this argument-processing trick is from Peter Otten -class Args: - def __init__(self, args_map): - self.dictionary = dict(args_map) - def __getattr__(self, key): - try: - return self.dictionary[key] - except KeyError: - return getattr(self.__class__, key) - -def form_parser_args( - select_default=False, - form_parser_class=None, - request_class=None, - backwards_compat=False, - ): - return Args(locals()) - - -class Link: - def __init__(self, base_url, url, text, tag, attrs): - assert None not in [url, tag, attrs] - self.base_url = base_url - self.absolute_url = _rfc3986.urljoin(base_url, url) - self.url, self.text, self.tag, self.attrs = url, text, tag, attrs - def __cmp__(self, other): - try: - for name in "url", "text", "tag", "attrs": - if getattr(self, name) != getattr(other, name): - return -1 - except AttributeError: - return -1 - return 0 - def __repr__(self): - return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % ( - self.base_url, self.url, self.text, self.tag, self.attrs) - - -class LinksFactory: - - def __init__(self, - link_parser_class=None, - link_class=Link, - urltags=None, - ): - import _pullparser - if link_parser_class is None: - link_parser_class = _pullparser.TolerantPullParser - self.link_parser_class = link_parser_class - self.link_class = link_class - if urltags is None: - urltags = { - "a": "href", - "area": "href", - "frame": "src", - "iframe": "src", - } - self.urltags = urltags - self._response = None - self._encoding = None - - def set_response(self, response, base_url, encoding): - self._response = response - self._encoding = encoding - self._base_url = base_url - - def links(self): - """Return an iterator that provides links of the document.""" - response = self._response - encoding = self._encoding - base_url = self._base_url - p = self.link_parser_class(response, encoding=encoding) - - try: - for token in p.tags(*(self.urltags.keys()+["base"])): - if token.type == "endtag": - continue - if token.data == "base": - base_href = dict(token.attrs).get("href") - if base_href is not None: - base_url = base_href - continue - attrs = dict(token.attrs) - tag = token.data - name = attrs.get("name") - text = None - # XXX use attr_encoding for ref'd doc if that doc does not - # provide one by other means - #attr_encoding = attrs.get("charset") - url = attrs.get(self.urltags[tag]) # XXX is "" a valid URL? - if not url: - # Probably an link or . - # For our purposes a link is something with a URL, so - # ignore this. - continue - - url = _rfc3986.clean_url(url, encoding) - if tag == "a": - if token.type != "startendtag": - # hmm, this'd break if end tag is missing - text = p.get_compressed_text(("endtag", tag)) - # but this doesn't work for eg. - # Andy - #text = p.get_compressed_text() - - yield Link(base_url, url, text, tag, token.attrs) - except sgmllib.SGMLParseError, exc: - raise ParseError(exc) - -class FormsFactory: - - """Makes a sequence of objects satisfying ClientForm.HTMLForm interface. - - After calling .forms(), the .global_form attribute is a form object - containing all controls not a descendant of any FORM element. - - For constructor argument docs, see ClientForm.ParseResponse - argument docs. - - """ - - def __init__(self, - select_default=False, - form_parser_class=None, - request_class=None, - backwards_compat=False, - ): - import ClientForm - self.select_default = select_default - if form_parser_class is None: - form_parser_class = ClientForm.FormParser - self.form_parser_class = form_parser_class - if request_class is None: - request_class = _request.Request - self.request_class = request_class - self.backwards_compat = backwards_compat - self._response = None - self.encoding = None - self.global_form = None - - def set_response(self, response, encoding): - self._response = response - self.encoding = encoding - self.global_form = None - - def forms(self): - import ClientForm - encoding = self.encoding - try: - forms = ClientForm.ParseResponseEx( - self._response, - select_default=self.select_default, - form_parser_class=self.form_parser_class, - request_class=self.request_class, - encoding=encoding, - _urljoin=_rfc3986.urljoin, - _urlparse=_rfc3986.urlsplit, - _urlunparse=_rfc3986.urlunsplit, - ) - except ClientForm.ParseError, exc: - raise ParseError(exc) - self.global_form = forms[0] - return forms[1:] - -class TitleFactory: - def __init__(self): - self._response = self._encoding = None - - def set_response(self, response, encoding): - self._response = response - self._encoding = encoding - - def _get_title_text(self, parser): - import _pullparser - text = [] - tok = None - while 1: - try: - tok = parser.get_token() - except _pullparser.NoMoreTokensError: - break - if tok.type == "data": - text.append(str(tok)) - elif tok.type == "entityref": - t = unescape("&%s;" % tok.data, - parser._entitydefs, parser.encoding) - text.append(t) - elif tok.type == "charref": - t = unescape_charref(tok.data, parser.encoding) - text.append(t) - elif tok.type in ["starttag", "endtag", "startendtag"]: - tag_name = tok.data - if tok.type == "endtag" and tag_name == "title": - break - text.append(str(tok)) - return COMPRESS_RE.sub(" ", "".join(text).strip()) - - def title(self): - import _pullparser - p = _pullparser.TolerantPullParser( - self._response, encoding=self._encoding) - try: - try: - p.get_tag("title") - except _pullparser.NoMoreTokensError: - return None - else: - return self._get_title_text(p) - except sgmllib.SGMLParseError, exc: - raise ParseError(exc) - - -def unescape(data, entities, encoding): - if data is None or "&" not in data: - return data - - def replace_entities(match): - ent = match.group() - if ent[1] == "#": - return unescape_charref(ent[2:-1], encoding) - - repl = entities.get(ent[1:-1]) - if repl is not None: - repl = unichr(repl) - if type(repl) != type(""): - try: - repl = repl.encode(encoding) - except UnicodeError: - repl = ent - else: - repl = ent - return repl - - return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data) - -def unescape_charref(data, encoding): - name, base = data, 10 - if name.startswith("x"): - name, base= name[1:], 16 - uc = unichr(int(name, base)) - if encoding is None: - return uc - else: - try: - repl = uc.encode(encoding) - except UnicodeError: - repl = "&#%s;" % data - return repl - - -# bizarre import gymnastics for bundled BeautifulSoup -import _beautifulsoup -import ClientForm -RobustFormParser, NestingRobustFormParser = ClientForm._create_bs_classes( - _beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup - ) -# monkeypatch sgmllib to fix http://www.python.org/sf/803422 :-( -sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]") - -class MechanizeBs(_beautifulsoup.BeautifulSoup): - _entitydefs = htmlentitydefs.name2codepoint - # don't want the magic Microsoft-char workaround - PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'), - lambda(x):x.group(1) + ' />'), - (re.compile(']*)>'), - lambda(x):'') - ] - - def __init__(self, encoding, text=None, avoidParserProblems=True, - initialTextIsEverything=True): - self._encoding = encoding - _beautifulsoup.BeautifulSoup.__init__( - self, text, avoidParserProblems, initialTextIsEverything) - - def handle_charref(self, ref): - t = unescape("&#%s;"%ref, self._entitydefs, self._encoding) - self.handle_data(t) - def handle_entityref(self, ref): - t = unescape("&%s;"%ref, self._entitydefs, self._encoding) - self.handle_data(t) - def unescape_attrs(self, attrs): - escaped_attrs = [] - for key, val in attrs: - val = unescape(val, self._entitydefs, self._encoding) - escaped_attrs.append((key, val)) - return escaped_attrs - -class RobustLinksFactory: - - compress_re = COMPRESS_RE - - def __init__(self, - link_parser_class=None, - link_class=Link, - urltags=None, - ): - if link_parser_class is None: - link_parser_class = MechanizeBs - self.link_parser_class = link_parser_class - self.link_class = link_class - if urltags is None: - urltags = { - "a": "href", - "area": "href", - "frame": "src", - "iframe": "src", - } - self.urltags = urltags - self._bs = None - self._encoding = None - self._base_url = None - - def set_soup(self, soup, base_url, encoding): - self._bs = soup - self._base_url = base_url - self._encoding = encoding - - def links(self): - import _beautifulsoup - bs = self._bs - base_url = self._base_url - encoding = self._encoding - gen = bs.recursiveChildGenerator() - for ch in bs.recursiveChildGenerator(): - if (isinstance(ch, _beautifulsoup.Tag) and - ch.name in self.urltags.keys()+["base"]): - link = ch - attrs = bs.unescape_attrs(link.attrs) - attrs_dict = dict(attrs) - if link.name == "base": - base_href = attrs_dict.get("href") - if base_href is not None: - base_url = base_href - continue - url_attr = self.urltags[link.name] - url = attrs_dict.get(url_attr) - if not url: - continue - url = _rfc3986.clean_url(url, encoding) - text = link.fetchText(lambda t: True) - if not text: - # follow _pullparser's weird behaviour rigidly - if link.name == "a": - text = "" - else: - text = None - else: - text = self.compress_re.sub(" ", " ".join(text).strip()) - yield Link(base_url, url, text, link.name, attrs) - - -class RobustFormsFactory(FormsFactory): - def __init__(self, *args, **kwds): - args = form_parser_args(*args, **kwds) - if args.form_parser_class is None: - args.form_parser_class = RobustFormParser - FormsFactory.__init__(self, **args.dictionary) - - def set_response(self, response, encoding): - self._response = response - self.encoding = encoding - - -class RobustTitleFactory: - def __init__(self): - self._bs = self._encoding = None - - def set_soup(self, soup, encoding): - self._bs = soup - self._encoding = encoding - - def title(self): - import _beautifulsoup - title = self._bs.first("title") - if title == _beautifulsoup.Null: - return None - else: - inner_html = "".join([str(node) for node in title.contents]) - return COMPRESS_RE.sub(" ", inner_html.strip()) - - -class Factory: - """Factory for forms, links, etc. - - This interface may expand in future. - - Public methods: - - set_request_class(request_class) - set_response(response) - forms() - links() - - Public attributes: - - Note that accessing these attributes may raise ParseError. - - encoding: string specifying the encoding of response if it contains a text - document (this value is left unspecified for documents that do not have - an encoding, e.g. an image file) - is_html: true if response contains an HTML document (XHTML may be - regarded as HTML too) - title: page title, or None if no title or not HTML - global_form: form object containing all controls that are not descendants - of any FORM element, or None if the forms_factory does not support - supplying a global form - - """ - - LAZY_ATTRS = ["encoding", "is_html", "title", "global_form"] - - def __init__(self, forms_factory, links_factory, title_factory, - encoding_finder=EncodingFinder(DEFAULT_ENCODING), - response_type_finder=ResponseTypeFinder(allow_xhtml=False), - ): - """ - - Pass keyword arguments only. - - default_encoding: character encoding to use if encoding cannot be - determined (or guessed) from the response. You should turn on - HTTP-EQUIV handling if you want the best chance of getting this right - without resorting to this default. The default value of this - parameter (currently latin-1) may change in future. - - """ - self._forms_factory = forms_factory - self._links_factory = links_factory - self._title_factory = title_factory - self._encoding_finder = encoding_finder - self._response_type_finder = response_type_finder - - self.set_response(None) - - def set_request_class(self, request_class): - """Set urllib2.Request class. - - ClientForm.HTMLForm instances returned by .forms() will return - instances of this class when .click()ed. - - """ - self._forms_factory.request_class = request_class - - def set_response(self, response): - """Set response. - - The response must either be None or implement the same interface as - objects returned by urllib2.urlopen(). - - """ - self._response = response - self._forms_genf = self._links_genf = None - self._get_title = None - for name in self.LAZY_ATTRS: - try: - delattr(self, name) - except AttributeError: - pass - - def __getattr__(self, name): - if name not in self.LAZY_ATTRS: - return getattr(self.__class__, name) - - if name == "encoding": - self.encoding = self._encoding_finder.encoding( - copy.copy(self._response)) - return self.encoding - elif name == "is_html": - self.is_html = self._response_type_finder.is_html( - copy.copy(self._response), self.encoding) - return self.is_html - elif name == "title": - if self.is_html: - self.title = self._title_factory.title() - else: - self.title = None - return self.title - elif name == "global_form": - self.forms() - return self.global_form - - def forms(self): - """Return iterable over ClientForm.HTMLForm-like objects. - - Raises mechanize.ParseError on failure. - """ - # this implementation sets .global_form as a side-effect, for benefit - # of __getattr__ impl - if self._forms_genf is None: - try: - self._forms_genf = CachingGeneratorFunction( - self._forms_factory.forms()) - except: # XXXX define exception! - self.set_response(self._response) - raise - self.global_form = getattr( - self._forms_factory, "global_form", None) - return self._forms_genf() - - def links(self): - """Return iterable over mechanize.Link-like objects. - - Raises mechanize.ParseError on failure. - """ - if self._links_genf is None: - try: - self._links_genf = CachingGeneratorFunction( - self._links_factory.links()) - except: # XXXX define exception! - self.set_response(self._response) - raise - return self._links_genf() - -class DefaultFactory(Factory): - """Based on sgmllib.""" - def __init__(self, i_want_broken_xhtml_support=False): - Factory.__init__( - self, - forms_factory=FormsFactory(), - links_factory=LinksFactory(), - title_factory=TitleFactory(), - response_type_finder=ResponseTypeFinder( - allow_xhtml=i_want_broken_xhtml_support), - ) - - def set_response(self, response): - Factory.set_response(self, response) - if response is not None: - self._forms_factory.set_response( - copy.copy(response), self.encoding) - self._links_factory.set_response( - copy.copy(response), response.geturl(), self.encoding) - self._title_factory.set_response( - copy.copy(response), self.encoding) - -class RobustFactory(Factory): - """Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is - DefaultFactory. - - """ - def __init__(self, i_want_broken_xhtml_support=False, - soup_class=None): - Factory.__init__( - self, - forms_factory=RobustFormsFactory(), - links_factory=RobustLinksFactory(), - title_factory=RobustTitleFactory(), - response_type_finder=ResponseTypeFinder( - allow_xhtml=i_want_broken_xhtml_support), - ) - if soup_class is None: - soup_class = MechanizeBs - self._soup_class = soup_class - - def set_response(self, response): - Factory.set_response(self, response) - if response is not None: - data = response.read() - soup = self._soup_class(self.encoding, data) - self._forms_factory.set_response( - copy.copy(response), self.encoding) - self._links_factory.set_soup( - soup, response.geturl(), self.encoding) - self._title_factory.set_soup(soup, self.encoding) diff --git a/samples-and-tests/i-am-a-developer/mechanize/_http.py b/samples-and-tests/i-am-a-developer/mechanize/_http.py deleted file mode 100644 index 1b80e2babd..0000000000 --- a/samples-and-tests/i-am-a-developer/mechanize/_http.py +++ /dev/null @@ -1,758 +0,0 @@ -"""HTTP related handlers. - -Note that some other HTTP handlers live in more specific modules: _auth.py, -_gzip.py, etc. - - -Copyright 2002-2006 John J Lee - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import time, htmlentitydefs, logging, socket, \ - urllib2, urllib, httplib, sgmllib -from urllib2 import URLError, HTTPError, BaseHandler -from cStringIO import StringIO - -from _clientcookie import CookieJar -from _headersutil import is_html -from _html import unescape, unescape_charref -from _request import Request -from _response import closeable_response, response_seek_wrapper -import _rfc3986 -import _sockettimeout - -debug = logging.getLogger("mechanize").debug -debug_robots = logging.getLogger("mechanize.robots").debug - -# monkeypatch urllib2.HTTPError to show URL -## def urllib2_str(self): -## return 'HTTP Error %s: %s (%s)' % ( -## self.code, self.msg, self.geturl()) -## urllib2.HTTPError.__str__ = urllib2_str - - -CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes -DEFAULT_ENCODING = 'latin-1' - - -try: - socket._fileobject("fake socket", close=True) -except TypeError: - # python <= 2.4 - create_readline_wrapper = socket._fileobject -else: - def create_readline_wrapper(fh): - return socket._fileobject(fh, close=True) - - -# This adds "refresh" to the list of redirectables and provides a redirection -# algorithm that doesn't go into a loop in the presence of cookies -# (Python 2.4 has this new algorithm, 2.3 doesn't). -class HTTPRedirectHandler(BaseHandler): - # maximum number of redirections to any single URL - # this is needed because of the state that cookies introduce - max_repeats = 4 - # maximum total number of redirections (regardless of URL) before - # assuming we're in a loop - max_redirections = 10 - - # Implementation notes: - - # To avoid the server sending us into an infinite loop, the request - # object needs to track what URLs we have already seen. Do this by - # adding a handler-specific attribute to the Request object. The value - # of the dict is used to count the number of times the same URL has - # been visited. This is needed because visiting the same URL twice - # does not necessarily imply a loop, thanks to state introduced by - # cookies. - - # Always unhandled redirection codes: - # 300 Multiple Choices: should not handle this here. - # 304 Not Modified: no need to handle here: only of interest to caches - # that do conditional GETs - # 305 Use Proxy: probably not worth dealing with here - # 306 Unused: what was this for in the previous versions of protocol?? - - def redirect_request(self, newurl, req, fp, code, msg, headers): - """Return a Request or None in response to a redirect. - - This is called by the http_error_30x methods when a redirection - response is received. If a redirection should take place, return a - new Request to allow http_error_30x to perform the redirect; - otherwise, return None to indicate that an HTTPError should be - raised. - - """ - if code in (301, 302, 303, "refresh") or \ - (code == 307 and not req.has_data()): - # Strictly (according to RFC 2616), 301 or 302 in response to - # a POST MUST NOT cause a redirection without confirmation - # from the user (of urllib2, in this case). In practice, - # essentially all clients do redirect in this case, so we do - # the same. - # XXX really refresh redirections should be visiting; tricky to - # fix, so this will wait until post-stable release - new = Request(newurl, - headers=req.headers, - origin_req_host=req.get_origin_req_host(), - unverifiable=True, - visit=False, - ) - new._origin_req = getattr(req, "_origin_req", req) - return new - else: - raise HTTPError(req.get_full_url(), code, msg, headers, fp) - - def http_error_302(self, req, fp, code, msg, headers): - # Some servers (incorrectly) return multiple Location headers - # (so probably same goes for URI). Use first header. - if headers.has_key('location'): - newurl = headers.getheaders('location')[0] - elif headers.has_key('uri'): - newurl = headers.getheaders('uri')[0] - else: - return - newurl = _rfc3986.clean_url(newurl, "latin-1") - newurl = _rfc3986.urljoin(req.get_full_url(), newurl) - - # XXX Probably want to forget about the state of the current - # request, although that might interact poorly with other - # handlers that also use handler-specific request attributes - new = self.redirect_request(newurl, req, fp, code, msg, headers) - if new is None: - return - - # loop detection - # .redirect_dict has a key url if url was previously visited. - if hasattr(req, 'redirect_dict'): - visited = new.redirect_dict = req.redirect_dict - if (visited.get(newurl, 0) >= self.max_repeats or - len(visited) >= self.max_redirections): - raise HTTPError(req.get_full_url(), code, - self.inf_msg + msg, headers, fp) - else: - visited = new.redirect_dict = req.redirect_dict = {} - visited[newurl] = visited.get(newurl, 0) + 1 - - # Don't close the fp until we are sure that we won't use it - # with HTTPError. - fp.read() - fp.close() - - return self.parent.open(new) - - http_error_301 = http_error_303 = http_error_307 = http_error_302 - http_error_refresh = http_error_302 - - inf_msg = "The HTTP server returned a redirect error that would " \ - "lead to an infinite loop.\n" \ - "The last 30x error message was:\n" - - -# XXX would self.reset() work, instead of raising this exception? -class EndOfHeadError(Exception): pass -class AbstractHeadParser: - # only these elements are allowed in or before HEAD of document - head_elems = ("html", "head", - "title", "base", - "script", "style", "meta", "link", "object") - _entitydefs = htmlentitydefs.name2codepoint - _encoding = DEFAULT_ENCODING - - def __init__(self): - self.http_equiv = [] - - def start_meta(self, attrs): - http_equiv = content = None - for key, value in attrs: - if key == "http-equiv": - http_equiv = self.unescape_attr_if_required(value) - elif key == "content": - content = self.unescape_attr_if_required(value) - if http_equiv is not None and content is not None: - self.http_equiv.append((http_equiv, content)) - - def end_head(self): - raise EndOfHeadError() - - def handle_entityref(self, name): - #debug("%s", name) - self.handle_data(unescape( - '&%s;' % name, self._entitydefs, self._encoding)) - - def handle_charref(self, name): - #debug("%s", name) - self.handle_data(unescape_charref(name, self._encoding)) - - def unescape_attr(self, name): - #debug("%s", name) - return unescape(name, self._entitydefs, self._encoding) - - def unescape_attrs(self, attrs): - #debug("%s", attrs) - escaped_attrs = {} - for key, val in attrs.items(): - escaped_attrs[key] = self.unescape_attr(val) - return escaped_attrs - - def unknown_entityref(self, ref): - self.handle_data("&%s;" % ref) - - def unknown_charref(self, ref): - self.handle_data("&#%s;" % ref) - - -try: - import HTMLParser -except ImportError: - pass -else: - class XHTMLCompatibleHeadParser(AbstractHeadParser, - HTMLParser.HTMLParser): - def __init__(self): - HTMLParser.HTMLParser.__init__(self) - AbstractHeadParser.__init__(self) - - def handle_starttag(self, tag, attrs): - if tag not in self.head_elems: - raise EndOfHeadError() - try: - method = getattr(self, 'start_' + tag) - except AttributeError: - try: - method = getattr(self, 'do_' + tag) - except AttributeError: - pass # unknown tag - else: - method(attrs) - else: - method(attrs) - - def handle_endtag(self, tag): - if tag not in self.head_elems: - raise EndOfHeadError() - try: - method = getattr(self, 'end_' + tag) - except AttributeError: - pass # unknown tag - else: - method() - - def unescape(self, name): - # Use the entitydefs passed into constructor, not - # HTMLParser.HTMLParser's entitydefs. - return self.unescape_attr(name) - - def unescape_attr_if_required(self, name): - return name # HTMLParser.HTMLParser already did it - -class HeadParser(AbstractHeadParser, sgmllib.SGMLParser): - - def _not_called(self): - assert False - - def __init__(self): - sgmllib.SGMLParser.__init__(self) - AbstractHeadParser.__init__(self) - - def handle_starttag(self, tag, method, attrs): - if tag not in self.head_elems: - raise EndOfHeadError() - if tag == "meta": - method(attrs) - - def unknown_starttag(self, tag, attrs): - self.handle_starttag(tag, self._not_called, attrs) - - def handle_endtag(self, tag, method): - if tag in self.head_elems: - method() - else: - raise EndOfHeadError() - - def unescape_attr_if_required(self, name): - return self.unescape_attr(name) - -def parse_head(fileobj, parser): - """Return a list of key, value pairs.""" - while 1: - data = fileobj.read(CHUNK) - try: - parser.feed(data) - except EndOfHeadError: - break - if len(data) != CHUNK: - # this should only happen if there is no HTML body, or if - # CHUNK is big - break - return parser.http_equiv - -class HTTPEquivProcessor(BaseHandler): - """Append META HTTP-EQUIV headers to regular HTTP headers.""" - - handler_order = 300 # before handlers that look at HTTP headers - - def __init__(self, head_parser_class=HeadParser, - i_want_broken_xhtml_support=False, - ): - self.head_parser_class = head_parser_class - self._allow_xhtml = i_want_broken_xhtml_support - - def http_response(self, request, response): - if not hasattr(response, "seek"): - response = response_seek_wrapper(response) - http_message = response.info() - url = response.geturl() - ct_hdrs = http_message.getheaders("content-type") - if is_html(ct_hdrs, url, self._allow_xhtml): - try: - try: - html_headers = parse_head(response, - self.head_parser_class()) - finally: - response.seek(0) - except (HTMLParser.HTMLParseError, - sgmllib.SGMLParseError): - pass - else: - for hdr, val in html_headers: - # add a header - http_message.dict[hdr.lower()] = val - text = hdr + ": " + val - for line in text.split("\n"): - http_message.headers.append(line + "\n") - return response - - https_response = http_response - -class HTTPCookieProcessor(BaseHandler): - """Handle HTTP cookies. - - Public attributes: - - cookiejar: CookieJar instance - - """ - def __init__(self, cookiejar=None): - if cookiejar is None: - cookiejar = CookieJar() - self.cookiejar = cookiejar - - def http_request(self, request): - self.cookiejar.add_cookie_header(request) - return request - - def http_response(self, request, response): - self.cookiejar.extract_cookies(response, request) - return response - - https_request = http_request - https_response = http_response - -try: - import robotparser -except ImportError: - pass -else: - class MechanizeRobotFileParser(robotparser.RobotFileParser): - - def __init__(self, url='', opener=None): - robotparser.RobotFileParser.__init__(self, url) - self._opener = opener - self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT - - def set_opener(self, opener=None): - import _opener - if opener is None: - opener = _opener.OpenerDirector() - self._opener = opener - - def set_timeout(self, timeout): - self._timeout = timeout - - def read(self): - """Reads the robots.txt URL and feeds it to the parser.""" - if self._opener is None: - self.set_opener() - req = Request(self.url, unverifiable=True, visit=False, - timeout=self._timeout) - try: - f = self._opener.open(req) - except HTTPError, f: - pass - except (IOError, socket.error, OSError), exc: - debug_robots("ignoring error opening %r: %s" % - (self.url, exc)) - return - lines = [] - line = f.readline() - while line: - lines.append(line.strip()) - line = f.readline() - status = f.code - if status == 401 or status == 403: - self.disallow_all = True - debug_robots("disallow all") - elif status >= 400: - self.allow_all = True - debug_robots("allow all") - elif status == 200 and lines: - debug_robots("parse lines") - self.parse(lines) - - class RobotExclusionError(urllib2.HTTPError): - def __init__(self, request, *args): - apply(urllib2.HTTPError.__init__, (self,)+args) - self.request = request - - class HTTPRobotRulesProcessor(BaseHandler): - # before redirections, after everything else - handler_order = 800 - - try: - from httplib import HTTPMessage - except: - from mimetools import Message - http_response_class = Message - else: - http_response_class = HTTPMessage - - def __init__(self, rfp_class=MechanizeRobotFileParser): - self.rfp_class = rfp_class - self.rfp = None - self._host = None - - def http_request(self, request): - scheme = request.get_type() - if scheme not in ["http", "https"]: - # robots exclusion only applies to HTTP - return request - - if request.get_selector() == "/robots.txt": - # /robots.txt is always OK to fetch - return request - - host = request.get_host() - - # robots.txt requests don't need to be allowed by robots.txt :-) - origin_req = getattr(request, "_origin_req", None) - if (origin_req is not None and - origin_req.get_selector() == "/robots.txt" and - origin_req.get_host() == host - ): - return request - - if host != self._host: - self.rfp = self.rfp_class() - try: - self.rfp.set_opener(self.parent) - except AttributeError: - debug("%r instance does not support set_opener" % - self.rfp.__class__) - self.rfp.set_url(scheme+"://"+host+"/robots.txt") - self.rfp.set_timeout(request.timeout) - self.rfp.read() - self._host = host - - ua = request.get_header("User-agent", "") - if self.rfp.can_fetch(ua, request.get_full_url()): - return request - else: - # XXX This should really have raised URLError. Too late now... - msg = "request disallowed by robots.txt" - raise RobotExclusionError( - request, - request.get_full_url(), - 403, msg, - self.http_response_class(StringIO()), StringIO(msg)) - - https_request = http_request - -class HTTPRefererProcessor(BaseHandler): - """Add Referer header to requests. - - This only makes sense if you use each RefererProcessor for a single - chain of requests only (so, for example, if you use a single - HTTPRefererProcessor to fetch a series of URLs extracted from a single - page, this will break). - - There's a proper implementation of this in mechanize.Browser. - - """ - def __init__(self): - self.referer = None - - def http_request(self, request): - if ((self.referer is not None) and - not request.has_header("Referer")): - request.add_unredirected_header("Referer", self.referer) - return request - - def http_response(self, request, response): - self.referer = response.geturl() - return response - - https_request = http_request - https_response = http_response - - -def clean_refresh_url(url): - # e.g. Firefox 1.5 does (something like) this - if ((url.startswith('"') and url.endswith('"')) or - (url.startswith("'") and url.endswith("'"))): - url = url[1:-1] - return _rfc3986.clean_url(url, "latin-1") # XXX encoding - -def parse_refresh_header(refresh): - """ - >>> parse_refresh_header("1; url=http://example.com/") - (1.0, 'http://example.com/') - >>> parse_refresh_header("1; url='http://example.com/'") - (1.0, 'http://example.com/') - >>> parse_refresh_header("1") - (1.0, None) - >>> parse_refresh_header("blah") - Traceback (most recent call last): - ValueError: invalid literal for float(): blah - - """ - - ii = refresh.find(";") - if ii != -1: - pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:] - jj = newurl_spec.find("=") - key = None - if jj != -1: - key, newurl = newurl_spec[:jj], newurl_spec[jj+1:] - newurl = clean_refresh_url(newurl) - if key is None or key.strip().lower() != "url": - raise ValueError() - else: - pause, newurl = float(refresh), None - return pause, newurl - -class HTTPRefreshProcessor(BaseHandler): - """Perform HTTP Refresh redirections. - - Note that if a non-200 HTTP code has occurred (for example, a 30x - redirect), this processor will do nothing. - - By default, only zero-time Refresh headers are redirected. Use the - max_time attribute / constructor argument to allow Refresh with longer - pauses. Use the honor_time attribute / constructor argument to control - whether the requested pause is honoured (with a time.sleep()) or - skipped in favour of immediate redirection. - - Public attributes: - - max_time: see above - honor_time: see above - - """ - handler_order = 1000 - - def __init__(self, max_time=0, honor_time=True): - self.max_time = max_time - self.honor_time = honor_time - self._sleep = time.sleep - - def http_response(self, request, response): - code, msg, hdrs = response.code, response.msg, response.info() - - if code == 200 and hdrs.has_key("refresh"): - refresh = hdrs.getheaders("refresh")[0] - try: - pause, newurl = parse_refresh_header(refresh) - except ValueError: - debug("bad Refresh header: %r" % refresh) - return response - - if newurl is None: - newurl = response.geturl() - if (self.max_time is None) or (pause <= self.max_time): - if pause > 1E-3 and self.honor_time: - self._sleep(pause) - hdrs["location"] = newurl - # hardcoded http is NOT a bug - response = self.parent.error( - "http", request, response, - "refresh", msg, hdrs) - else: - debug("Refresh header ignored: %r" % refresh) - - return response - - https_response = http_response - -class HTTPErrorProcessor(BaseHandler): - """Process HTTP error responses. - - The purpose of this handler is to to allow other response processors a - look-in by removing the call to parent.error() from - AbstractHTTPHandler. - - For non-200 error codes, this just passes the job on to the - Handler._error_ methods, via the OpenerDirector.error - method. Eventually, urllib2.HTTPDefaultErrorHandler will raise an - HTTPError if no other handler handles the error. - - """ - handler_order = 1000 # after all other processors - - def http_response(self, request, response): - code, msg, hdrs = response.code, response.msg, response.info() - - if code != 200: - # hardcoded http is NOT a bug - response = self.parent.error( - "http", request, response, code, msg, hdrs) - - return response - - https_response = http_response - - -class HTTPDefaultErrorHandler(BaseHandler): - def http_error_default(self, req, fp, code, msg, hdrs): - # why these error methods took the code, msg, headers args in the first - # place rather than a response object, I don't know, but to avoid - # multiple wrapping, we're discarding them - - if isinstance(fp, urllib2.HTTPError): - response = fp - else: - response = urllib2.HTTPError( - req.get_full_url(), code, msg, hdrs, fp) - assert code == response.code - assert msg == response.msg - assert hdrs == response.hdrs - raise response - - -class AbstractHTTPHandler(BaseHandler): - - def __init__(self, debuglevel=0): - self._debuglevel = debuglevel - - def set_http_debuglevel(self, level): - self._debuglevel = level - - def do_request_(self, request): - host = request.get_host() - if not host: - raise URLError('no host given') - - if request.has_data(): # POST - data = request.get_data() - if not request.has_header('Content-type'): - request.add_unredirected_header( - 'Content-type', - 'application/x-www-form-urlencoded') - if not request.has_header('Content-length'): - request.add_unredirected_header( - 'Content-length', '%d' % len(data)) - - scheme, sel = urllib.splittype(request.get_selector()) - sel_host, sel_path = urllib.splithost(sel) - if not request.has_header('Host'): - request.add_unredirected_header('Host', sel_host or host) - for name, value in self.parent.addheaders: - name = name.capitalize() - if not request.has_header(name): - request.add_unredirected_header(name, value) - - return request - - def do_open(self, http_class, req): - """Return an addinfourl object for the request, using http_class. - - http_class must implement the HTTPConnection API from httplib. - The addinfourl return value is a file-like object. It also - has methods and attributes including: - - info(): return a mimetools.Message object for the headers - - geturl(): return the original request URL - - code: HTTP status code - """ - host_port = req.get_host() - if not host_port: - raise URLError('no host given') - - try: - h = http_class(host_port, timeout=req.timeout) - except TypeError: - # Python < 2.6, no per-connection timeout support - h = http_class(host_port) - h.set_debuglevel(self._debuglevel) - - headers = dict(req.headers) - headers.update(req.unredirected_hdrs) - # We want to make an HTTP/1.1 request, but the addinfourl - # class isn't prepared to deal with a persistent connection. - # It will try to read all remaining data from the socket, - # which will block while the server waits for the next request. - # So make sure the connection gets closed after the (only) - # request. - headers["Connection"] = "close" - headers = dict( - [(name.title(), val) for name, val in headers.items()]) - try: - h.request(req.get_method(), req.get_selector(), req.data, headers) - r = h.getresponse() - except socket.error, err: # XXX what error? - raise URLError(err) - - # Pick apart the HTTPResponse object to get the addinfourl - # object initialized properly. - - # Wrap the HTTPResponse object in socket's file object adapter - # for Windows. That adapter calls recv(), so delegate recv() - # to read(). This weird wrapping allows the returned object to - # have readline() and readlines() methods. - - # XXX It might be better to extract the read buffering code - # out of socket._fileobject() and into a base class. - - r.recv = r.read - fp = create_readline_wrapper(r) - - resp = closeable_response(fp, r.msg, req.get_full_url(), - r.status, r.reason) - return resp - - -class HTTPHandler(AbstractHTTPHandler): - def http_open(self, req): - return self.do_open(httplib.HTTPConnection, req) - - http_request = AbstractHTTPHandler.do_request_ - -if hasattr(httplib, 'HTTPS'): - - class HTTPSConnectionFactory: - def __init__(self, key_file, cert_file): - self._key_file = key_file - self._cert_file = cert_file - def __call__(self, hostport): - return httplib.HTTPSConnection( - hostport, - key_file=self._key_file, cert_file=self._cert_file) - - class HTTPSHandler(AbstractHTTPHandler): - def __init__(self, client_cert_manager=None): - AbstractHTTPHandler.__init__(self) - self.client_cert_manager = client_cert_manager - - def https_open(self, req): - if self.client_cert_manager is not None: - key_file, cert_file = self.client_cert_manager.find_key_cert( - req.get_full_url()) - conn_factory = HTTPSConnectionFactory(key_file, cert_file) - else: - conn_factory = httplib.HTTPSConnection - return self.do_open(conn_factory, req) - - https_request = AbstractHTTPHandler.do_request_ diff --git a/samples-and-tests/i-am-a-developer/mechanize/_lwpcookiejar.py b/samples-and-tests/i-am-a-developer/mechanize/_lwpcookiejar.py deleted file mode 100644 index f8d49cf2d4..0000000000 --- a/samples-and-tests/i-am-a-developer/mechanize/_lwpcookiejar.py +++ /dev/null @@ -1,185 +0,0 @@ -"""Load / save to libwww-perl (LWP) format files. - -Actually, the format is slightly extended from that used by LWP's -(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information -not recorded by LWP. - -It uses the version string "2.0", though really there isn't an LWP Cookies -2.0 format. This indicates that there is extra information in here -(domain_dot and port_spec) while still being compatible with libwww-perl, -I hope. - -Copyright 2002-2006 John J Lee -Copyright 1997-1999 Gisle Aas (original libwww-perl code) - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import time, re, logging - -from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \ - MISSING_FILENAME_TEXT, LoadError -from _headersutil import join_header_words, split_header_words -from _util import iso2time, time2isoz - -debug = logging.getLogger("mechanize").debug - - -def lwp_cookie_str(cookie): - """Return string representation of Cookie in an the LWP cookie file format. - - Actually, the format is extended a bit -- see module docstring. - - """ - h = [(cookie.name, cookie.value), - ("path", cookie.path), - ("domain", cookie.domain)] - if cookie.port is not None: h.append(("port", cookie.port)) - if cookie.path_specified: h.append(("path_spec", None)) - if cookie.port_specified: h.append(("port_spec", None)) - if cookie.domain_initial_dot: h.append(("domain_dot", None)) - if cookie.secure: h.append(("secure", None)) - if cookie.expires: h.append(("expires", - time2isoz(float(cookie.expires)))) - if cookie.discard: h.append(("discard", None)) - if cookie.comment: h.append(("comment", cookie.comment)) - if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) - if cookie.rfc2109: h.append(("rfc2109", None)) - - keys = cookie.nonstandard_attr_keys() - keys.sort() - for k in keys: - h.append((k, str(cookie.get_nonstandard_attr(k)))) - - h.append(("version", str(cookie.version))) - - return join_header_words([h]) - -class LWPCookieJar(FileCookieJar): - """ - The LWPCookieJar saves a sequence of"Set-Cookie3" lines. - "Set-Cookie3" is the format used by the libwww-perl libary, not known - to be compatible with any browser, but which is easy to read and - doesn't lose information about RFC 2965 cookies. - - Additional methods - - as_lwp_str(ignore_discard=True, ignore_expired=True) - - """ - - magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" - - def as_lwp_str(self, ignore_discard=True, ignore_expires=True): - """Return cookies as a string of "\n"-separated "Set-Cookie3" headers. - - ignore_discard and ignore_expires: see docstring for FileCookieJar.save - - """ - now = time.time() - r = [] - for cookie in self: - if not ignore_discard and cookie.discard: - debug(" Not saving %s: marked for discard", cookie.name) - continue - if not ignore_expires and cookie.is_expired(now): - debug(" Not saving %s: expired", cookie.name) - continue - r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) - return "\n".join(r+[""]) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename, "w") - try: - debug("Saving LWP cookies file") - # There really isn't an LWP Cookies 2.0 format, but this indicates - # that there is extra information in here (domain_dot and - # port_spec) while still being compatible with libwww-perl, I hope. - f.write("#LWP-Cookies-2.0\n") - f.write(self.as_lwp_str(ignore_discard, ignore_expires)) - finally: - f.close() - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - magic = f.readline() - if not re.search(self.magic_re, magic): - msg = "%s does not seem to contain cookies" % filename - raise LoadError(msg) - - now = time.time() - - header = "Set-Cookie3:" - boolean_attrs = ("port_spec", "path_spec", "domain_dot", - "secure", "discard", "rfc2109") - value_attrs = ("version", - "port", "path", "domain", - "expires", - "comment", "commenturl") - - try: - while 1: - line = f.readline() - if line == "": break - if not line.startswith(header): - continue - line = line[len(header):].strip() - - for data in split_header_words([line]): - name, value = data[0] - standard = {} - rest = {} - for k in boolean_attrs: - standard[k] = False - for k, v in data[1:]: - if k is not None: - lc = k.lower() - else: - lc = None - # don't lose case distinction for unknown fields - if (lc in value_attrs) or (lc in boolean_attrs): - k = lc - if k in boolean_attrs: - if v is None: v = True - standard[k] = v - elif k in value_attrs: - standard[k] = v - else: - rest[k] = v - - h = standard.get - expires = h("expires") - discard = h("discard") - if expires is not None: - expires = iso2time(expires) - if expires is None: - discard = True - domain = h("domain") - domain_specified = domain.startswith(".") - c = Cookie(h("version"), name, value, - h("port"), h("port_spec"), - domain, domain_specified, h("domain_dot"), - h("path"), h("path_spec"), - h("secure"), - expires, - discard, - h("comment"), - h("commenturl"), - rest, - h("rfc2109"), - ) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - except: - reraise_unmasked_exceptions((IOError,)) - raise LoadError("invalid Set-Cookie3 format file %s" % filename) - diff --git a/samples-and-tests/i-am-a-developer/mechanize/_mechanize.py b/samples-and-tests/i-am-a-developer/mechanize/_mechanize.py deleted file mode 100644 index ad729c9d0c..0000000000 --- a/samples-and-tests/i-am-a-developer/mechanize/_mechanize.py +++ /dev/null @@ -1,676 +0,0 @@ -"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize. - -Copyright 2003-2006 John J. Lee -Copyright 2003 Andy Lester (original Perl code) - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt -included with the distribution). - -""" - -import urllib2, copy, re, os, urllib - - -from _html import DefaultFactory -import _response -import _request -import _rfc3986 -import _sockettimeout -from _useragent import UserAgentBase - -__version__ = (0, 1, 11, None, None) # 0.1.11 - -class BrowserStateError(Exception): pass -class LinkNotFoundError(Exception): pass -class FormNotFoundError(Exception): pass - - -def sanepathname2url(path): - urlpath = urllib.pathname2url(path) - if os.name == "nt" and urlpath.startswith("///"): - urlpath = urlpath[2:] - # XXX don't ask me about the mac... - return urlpath - - -class History: - """ - - Though this will become public, the implied interface is not yet stable. - - """ - def __init__(self): - self._history = [] # LIFO - def add(self, request, response): - self._history.append((request, response)) - def back(self, n, _response): - response = _response # XXX move Browser._response into this class? - while n > 0 or response is None: - try: - request, response = self._history.pop() - except IndexError: - raise BrowserStateError("already at start of history") - n -= 1 - return request, response - def clear(self): - del self._history[:] - def close(self): - for request, response in self._history: - if response is not None: - response.close() - del self._history[:] - - -class HTTPRefererProcessor(urllib2.BaseHandler): - def http_request(self, request): - # See RFC 2616 14.36. The only times we know the source of the - # request URI has a URI associated with it are redirect, and - # Browser.click() / Browser.submit() / Browser.follow_link(). - # Otherwise, it's the user's job to add any Referer header before - # .open()ing. - if hasattr(request, "redirect_dict"): - request = self.parent._add_referer_header( - request, origin_request=False) - return request - - https_request = http_request - - -class Browser(UserAgentBase): - """Browser-like class with support for history, forms and links. - - BrowserStateError is raised whenever the browser is in the wrong state to - complete the requested operation - eg., when .back() is called when the - browser history is empty, or when .follow_link() is called when the current - response does not contain HTML data. - - Public attributes: - - request: current request (mechanize.Request or urllib2.Request) - form: currently selected form (see .select_form()) - - """ - - handler_classes = copy.copy(UserAgentBase.handler_classes) - handler_classes["_referer"] = HTTPRefererProcessor - default_features = copy.copy(UserAgentBase.default_features) - default_features.append("_referer") - - def __init__(self, - factory=None, - history=None, - request_class=None, - ): - """ - - Only named arguments should be passed to this constructor. - - factory: object implementing the mechanize.Factory interface. - history: object implementing the mechanize.History interface. Note - this interface is still experimental and may change in future. - request_class: Request class to use. Defaults to mechanize.Request - by default for Pythons older than 2.4, urllib2.Request otherwise. - - The Factory and History objects passed in are 'owned' by the Browser, - so they should not be shared across Browsers. In particular, - factory.set_response() should not be called except by the owning - Browser itself. - - Note that the supplied factory's request_class is overridden by this - constructor, to ensure only one Request class is used. - - """ - self._handle_referer = True - - if history is None: - history = History() - self._history = history - - if request_class is None: - if not hasattr(urllib2.Request, "add_unredirected_header"): - request_class = _request.Request - else: - request_class = urllib2.Request # Python >= 2.4 - - if factory is None: - factory = DefaultFactory() - factory.set_request_class(request_class) - self._factory = factory - self.request_class = request_class - - self.request = None - self._set_response(None, False) - - # do this last to avoid __getattr__ problems - UserAgentBase.__init__(self) - - def close(self): - UserAgentBase.close(self) - if self._response is not None: - self._response.close() - if self._history is not None: - self._history.close() - self._history = None - - # make use after .close easy to spot - self.form = None - self.request = self._response = None - self.request = self.response = self.set_response = None - self.geturl = self.reload = self.back = None - self.clear_history = self.set_cookie = self.links = self.forms = None - self.viewing_html = self.encoding = self.title = None - self.select_form = self.click = self.submit = self.click_link = None - self.follow_link = self.find_link = None - - def set_handle_referer(self, handle): - """Set whether to add Referer header to each request.""" - self._set_handler("_referer", handle) - self._handle_referer = bool(handle) - - def _add_referer_header(self, request, origin_request=True): - if self.request is None: - return request - scheme = request.get_type() - original_scheme = self.request.get_type() - if scheme not in ["http", "https"]: - return request - if not origin_request and not self.request.has_header("Referer"): - return request - - if (self._handle_referer and - original_scheme in ["http", "https"] and - not (original_scheme == "https" and scheme != "https")): - # strip URL fragment (RFC 2616 14.36) - parts = _rfc3986.urlsplit(self.request.get_full_url()) - parts = parts[:-1]+(None,) - referer = _rfc3986.urlunsplit(parts) - request.add_unredirected_header("Referer", referer) - return request - - def open_novisit(self, url, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - """Open a URL without visiting it. - - Browser state (including request, response, history, forms and links) - is left unchanged by calling this function. - - The interface is the same as for .open(). - - This is useful for things like fetching images. - - See also .retrieve(). - - """ - return self._mech_open(url, data, visit=False, timeout=timeout) - - def open(self, url, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - return self._mech_open(url, data, timeout=timeout) - - def _mech_open(self, url, data=None, update_history=True, visit=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - try: - url.get_full_url - except AttributeError: - # string URL -- convert to absolute URL if required - scheme, authority = _rfc3986.urlsplit(url)[:2] - if scheme is None: - # relative URL - if self._response is None: - raise BrowserStateError( - "can't fetch relative reference: " - "not viewing any document") - url = _rfc3986.urljoin(self._response.geturl(), url) - - request = self._request(url, data, visit, timeout) - visit = request.visit - if visit is None: - visit = True - - if visit: - self._visit_request(request, update_history) - - success = True - try: - response = UserAgentBase.open(self, request, data) - except urllib2.HTTPError, error: - success = False - if error.fp is None: # not a response - raise - response = error -## except (IOError, socket.error, OSError), error: -## # Yes, urllib2 really does raise all these :-(( -## # See test_urllib2.py for examples of socket.gaierror and OSError, -## # plus note that FTPHandler raises IOError. -## # XXX I don't seem to have an example of exactly socket.error being -## # raised, only socket.gaierror... -## # I don't want to start fixing these here, though, since this is a -## # subclass of OpenerDirector, and it would break old code. Even in -## # Python core, a fix would need some backwards-compat. hack to be -## # acceptable. -## raise - - if visit: - self._set_response(response, False) - response = copy.copy(self._response) - elif response is not None: - response = _response.upgrade_response(response) - - if not success: - raise response - return response - - def __str__(self): - text = [] - text.append("<%s " % self.__class__.__name__) - if self._response: - text.append("visiting %s" % self._response.geturl()) - else: - text.append("(not visiting a URL)") - if self.form: - text.append("\n selected form:\n %s\n" % str(self.form)) - text.append(">") - return "".join(text) - - def response(self): - """Return a copy of the current response. - - The returned object has the same interface as the object returned by - .open() (or urllib2.urlopen()). - - """ - return copy.copy(self._response) - - def open_local_file(self, filename): - path = sanepathname2url(os.path.abspath(filename)) - url = 'file://'+path - return self.open(url) - - def set_response(self, response): - """Replace current response with (a copy of) response. - - response may be None. - - This is intended mostly for HTML-preprocessing. - """ - self._set_response(response, True) - - def _set_response(self, response, close_current): - # sanity check, necessary but far from sufficient - if not (response is None or - (hasattr(response, "info") and hasattr(response, "geturl") and - hasattr(response, "read") - ) - ): - raise ValueError("not a response object") - - self.form = None - if response is not None: - response = _response.upgrade_response(response) - if close_current and self._response is not None: - self._response.close() - self._response = response - self._factory.set_response(response) - - def visit_response(self, response, request=None): - """Visit the response, as if it had been .open()ed. - - Unlike .set_response(), this updates history rather than replacing the - current response. - """ - if request is None: - request = _request.Request(response.geturl()) - self._visit_request(request, True) - self._set_response(response, False) - - def _visit_request(self, request, update_history): - if self._response is not None: - self._response.close() - if self.request is not None and update_history: - self._history.add(self.request, self._response) - self._response = None - # we want self.request to be assigned even if UserAgentBase.open - # fails - self.request = request - - def geturl(self): - """Get URL of current document.""" - if self._response is None: - raise BrowserStateError("not viewing any document") - return self._response.geturl() - - def reload(self): - """Reload current document, and return response object.""" - if self.request is None: - raise BrowserStateError("no URL has yet been .open()ed") - if self._response is not None: - self._response.close() - return self._mech_open(self.request, update_history=False) - - def back(self, n=1): - """Go back n steps in history, and return response object. - - n: go back this number of steps (default 1 step) - - """ - if self._response is not None: - self._response.close() - self.request, response = self._history.back(n, self._response) - self.set_response(response) - if not response.read_complete: - return self.reload() - return copy.copy(response) - - def clear_history(self): - self._history.clear() - - def set_cookie(self, cookie_string): - """Request to set a cookie. - - Note that it is NOT necessary to call this method under ordinary - circumstances: cookie handling is normally entirely automatic. The - intended use case is rather to simulate the setting of a cookie by - client script in a web page (e.g. JavaScript). In that case, use of - this method is necessary because mechanize currently does not support - JavaScript, VBScript, etc. - - The cookie is added in the same way as if it had arrived with the - current response, as a result of the current request. This means that, - for example, if it is not appropriate to set the cookie based on the - current request, no cookie will be set. - - The cookie will be returned automatically with subsequent responses - made by the Browser instance whenever that's appropriate. - - cookie_string should be a valid value of the Set-Cookie header. - - For example: - - browser.set_cookie( - "sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT") - - Currently, this method does not allow for adding RFC 2986 cookies. - This limitation will be lifted if anybody requests it. - - """ - if self._response is None: - raise BrowserStateError("not viewing any document") - if self.request.get_type() not in ["http", "https"]: - raise BrowserStateError("can't set cookie for non-HTTP/HTTPS " - "transactions") - cookiejar = self._ua_handlers["_cookies"].cookiejar - response = self.response() # copy - headers = response.info() - headers["Set-cookie"] = cookie_string - cookiejar.extract_cookies(response, self.request) - - def links(self, **kwds): - """Return iterable over links (mechanize.Link objects).""" - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - links = self._factory.links() - if kwds: - return self._filter_links(links, **kwds) - else: - return links - - def forms(self): - """Return iterable over forms. - - The returned form objects implement the ClientForm.HTMLForm interface. - - """ - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - return self._factory.forms() - - def global_form(self): - """Return the global form object, or None if the factory implementation - did not supply one. - - The "global" form object contains all controls that are not descendants - of any FORM element. - - The returned form object implements the ClientForm.HTMLForm interface. - - This is a separate method since the global form is not regarded as part - of the sequence of forms in the document -- mostly for - backwards-compatibility. - - """ - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - return self._factory.global_form - - def viewing_html(self): - """Return whether the current response contains HTML data.""" - if self._response is None: - raise BrowserStateError("not viewing any document") - return self._factory.is_html - - def encoding(self): - if self._response is None: - raise BrowserStateError("not viewing any document") - return self._factory.encoding - - def title(self): - r"""Return title, or None if there is no title element in the document. - - Treatment of any tag children of attempts to follow Firefox and IE - (currently, tags are preserved). - - """ - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - return self._factory.title - - def select_form(self, name=None, predicate=None, nr=None): - """Select an HTML form for input. - - This is a bit like giving a form the "input focus" in a browser. - - If a form is selected, the Browser object supports the HTMLForm - interface, so you can call methods like .set_value(), .set(), and - .click(). - - Another way to select a form is to assign to the .form attribute. The - form assigned should be one of the objects returned by the .forms() - method. - - At least one of the name, predicate and nr arguments must be supplied. - If no matching form is found, mechanize.FormNotFoundError is raised. - - If name is specified, then the form must have the indicated name. - - If predicate is specified, then the form must match that function. The - predicate function is passed the HTMLForm as its single argument, and - should return a boolean value indicating whether the form matched. - - nr, if supplied, is the sequence number of the form (where 0 is the - first). Note that control 0 is the first form matching all the other - arguments (if supplied); it is not necessarily the first control in the - form. The "global form" (consisting of all form controls not contained - in any FORM element) is considered not to be part of this sequence and - to have no name, so will not be matched unless both name and nr are - None. - - """ - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - if (name is None) and (predicate is None) and (nr is None): - raise ValueError( - "at least one argument must be supplied to specify form") - - global_form = self._factory.global_form - if nr is None and name is None and \ - predicate is not None and predicate(global_form): - self.form = global_form - return - - orig_nr = nr - for form in self.forms(): - if name is not None and name != form.name: - continue - if predicate is not None and not predicate(form): - continue - if nr: - nr -= 1 - continue - self.form = form - break # success - else: - # failure - description = [] - if name is not None: description.append("name '%s'" % name) - if predicate is not None: - description.append("predicate %s" % predicate) - if orig_nr is not None: description.append("nr %d" % orig_nr) - description = ", ".join(description) - raise FormNotFoundError("no form matching "+description) - - def click(self, *args, **kwds): - """See ClientForm.HTMLForm.click for documentation.""" - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - request = self.form.click(*args, **kwds) - return self._add_referer_header(request) - - def submit(self, *args, **kwds): - """Submit current form. - - Arguments are as for ClientForm.HTMLForm.click(). - - Return value is same as for Browser.open(). - - """ - return self.open(self.click(*args, **kwds)) - - def click_link(self, link=None, **kwds): - """Find a link and return a Request object for it. - - Arguments are as for .find_link(), except that a link may be supplied - as the first argument. - - """ - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - if not link: - link = self.find_link(**kwds) - else: - if kwds: - raise ValueError( - "either pass a Link, or keyword arguments, not both") - request = self.request_class(link.absolute_url) - return self._add_referer_header(request) - - def follow_link(self, link=None, **kwds): - """Find a link and .open() it. - - Arguments are as for .click_link(). - - Return value is same as for Browser.open(). - - """ - return self.open(self.click_link(link, **kwds)) - - def find_link(self, **kwds): - """Find a link in current page. - - Links are returned as mechanize.Link objects. - - # Return third link that .search()-matches the regexp "python" - # (by ".search()-matches", I mean that the regular expression method - # .search() is used, rather than .match()). - find_link(text_regex=re.compile("python"), nr=2) - - # Return first http link in the current page that points to somewhere - # on python.org whose link text (after tags have been removed) is - # exactly "monty python". - find_link(text="monty python", - url_regex=re.compile("http.*python.org")) - - # Return first link with exactly three HTML attributes. - find_link(predicate=lambda link: len(link.attrs) == 3) - - Links include anchors (), image maps (), and frames (, -