diff --git a/botocore/endpoint.py b/botocore/endpoint.py index db6179b2a1..0cadf514f9 100644 --- a/botocore/endpoint.py +++ b/botocore/endpoint.py @@ -20,8 +20,6 @@ from botocore.vendored.requests.sessions import Session from botocore.vendored.requests.utils import get_environ_proxies -import botocore.response -import botocore.exceptions from botocore.exceptions import UnknownEndpointError from botocore.awsrequest import AWSRequest from botocore.compat import urljoin @@ -66,6 +64,11 @@ def convert_to_response_dict(http_response, operation_model): return response_dict +class PreserveAuthSession(Session): + def rebuild_auth(self, prepared_request, response): + pass + + class Endpoint(object): """ Represents an endpoint for a particular service in a specific @@ -89,7 +92,7 @@ def __init__(self, region_name, host, user_agent, if proxies is None: proxies = {} self.proxies = proxies - self.http_session = Session() + self.http_session = PreserveAuthSession() self.timeout = timeout self._lock = threading.Lock() if response_parser_factory is None: @@ -111,9 +114,8 @@ def create_request(self, params, operation_model=None): endpoint_prefix=self._endpoint_prefix, op_name=operation_model.name) self._event_emitter.emit(event_name, request=request, - operation_name=operation_model.name) - prepared_request = self.prepare_request( - request) + operation_name=operation_model.name) + prepared_request = self.prepare_request(request) return prepared_request def _create_request_object(self, request_dict): diff --git a/botocore/vendored/requests/__init__.py b/botocore/vendored/requests/__init__.py index 793ac22aab..ac2b06c86c 100644 --- a/botocore/vendored/requests/__init__.py +++ b/botocore/vendored/requests/__init__.py @@ -13,7 +13,7 @@ usage: >>> import requests - >>> r = requests.get('http://python.org') + >>> r = requests.get('https://www.python.org') >>> r.status_code 200 >>> 'Python is a programming language' in r.content @@ -22,7 +22,7 @@ ... or POST: >>> payload = dict(key1='value1', key2='value2') - >>> r = requests.post("http://httpbin.org/post", data=payload) + >>> r = requests.post('http://httpbin.org/post', data=payload) >>> print(r.text) { ... @@ -36,17 +36,17 @@ The other HTTP methods are supported - see `requests.api`. Full documentation is at . -:copyright: (c) 2013 by Kenneth Reitz. +:copyright: (c) 2014 by Kenneth Reitz. :license: Apache 2.0, see LICENSE for more details. """ __title__ = 'requests' -__version__ = '2.0.1' -__build__ = 0x020001 +__version__ = '2.5.1' +__build__ = 0x020501 __author__ = 'Kenneth Reitz' __license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2013 Kenneth Reitz' +__copyright__ = 'Copyright 2014 Kenneth Reitz' # Attempt to enable urllib3's SNI support, if possible try: diff --git a/botocore/vendored/requests/adapters.py b/botocore/vendored/requests/adapters.py index 0adca690cd..c892853b29 100644 --- a/botocore/vendored/requests/adapters.py +++ b/botocore/vendored/requests/adapters.py @@ -11,20 +11,25 @@ import socket from .models import Response +from .packages.urllib3 import Retry from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.response import HTTPResponse from .packages.urllib3.util import Timeout as TimeoutSauce -from .compat import urlparse, basestring, urldefrag, unquote +from .compat import urlparse, basestring from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, - except_on_missing_scheme, get_auth_from_url) + prepend_scheme_if_needed, get_auth_from_url, urldefragauth) from .structures import CaseInsensitiveDict -from .packages.urllib3.exceptions import MaxRetryError -from .packages.urllib3.exceptions import TimeoutError -from .packages.urllib3.exceptions import SSLError as _SSLError +from .packages.urllib3.exceptions import ConnectTimeoutError from .packages.urllib3.exceptions import HTTPError as _HTTPError +from .packages.urllib3.exceptions import MaxRetryError from .packages.urllib3.exceptions import ProxyError as _ProxyError +from .packages.urllib3.exceptions import ProtocolError +from .packages.urllib3.exceptions import ReadTimeoutError +from .packages.urllib3.exceptions import SSLError as _SSLError +from .packages.urllib3.exceptions import ResponseError from .cookies import extract_cookies_to_jar -from .exceptions import ConnectionError, Timeout, SSLError, ProxyError +from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, + ProxyError, RetryError) from .auth import _basic_auth_str DEFAULT_POOLBLOCK = False @@ -55,14 +60,20 @@ class HTTPAdapter(BaseAdapter): :param pool_connections: The number of urllib3 connection pools to cache. :param pool_maxsize: The maximum number of connections to save in the pool. - :param max_retries: The maximum number of retries each connection should attempt. + :param int max_retries: The maximum number of retries each connection + should attempt. Note, this applies only to failed DNS lookups, socket + connections and connection timeouts, never to requests where data has + made it to the server. By default, Requests does not retry failed + connections. If you need granular control over the conditions under + which we retry a request, import urllib3's ``Retry`` class and pass + that instead. :param pool_block: Whether the connection pool should block for connections. Usage:: >>> import requests >>> s = requests.Session() - >>> a = requests.adapters.HTTPAdapter() + >>> a = requests.adapters.HTTPAdapter(max_retries=3) >>> s.mount('http://', a) """ __attrs__ = ['max_retries', 'config', '_pool_connections', '_pool_maxsize', @@ -71,7 +82,10 @@ class HTTPAdapter(BaseAdapter): def __init__(self, pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES, pool_block=DEFAULT_POOLBLOCK): - self.max_retries = max_retries + if max_retries == DEFAULT_RETRIES: + self.max_retries = Retry(0, read=False) + else: + self.max_retries = Retry.from_int(max_retries) self.config = {} self.proxy_manager = {} @@ -88,20 +102,28 @@ def __getstate__(self): self.__attrs__) def __setstate__(self, state): + # Can't handle by adding 'proxy_manager' to self.__attrs__ because + # because self.poolmanager uses a lambda function, which isn't pickleable. + self.proxy_manager = {} + self.config = {} + for attr, value in state.items(): setattr(self, attr, value) self.init_poolmanager(self._pool_connections, self._pool_maxsize, block=self._pool_block) - def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK): - """Initializes a urllib3 PoolManager. This method should not be called - from user code, and is only exposed for use when subclassing the + def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs): + """Initializes a urllib3 PoolManager. + + This method should not be called from user code, and is only + exposed for use when subclassing the :class:`HTTPAdapter `. :param connections: The number of urllib3 connection pools to cache. :param maxsize: The maximum number of connections to save in the pool. :param block: Block when no free connections are available. + :param pool_kwargs: Extra keyword arguments used to initialize the Pool Manager. """ # save these values for pickling self._pool_connections = connections @@ -109,7 +131,30 @@ def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK): self._pool_block = block self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize, - block=block) + block=block, strict=True, **pool_kwargs) + + def proxy_manager_for(self, proxy, **proxy_kwargs): + """Return urllib3 ProxyManager for the given proxy. + + This method should not be called from user code, and is only + exposed for use when subclassing the + :class:`HTTPAdapter `. + + :param proxy: The proxy to return a urllib3 ProxyManager for. + :param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager. + :returns: ProxyManager + """ + if not proxy in self.proxy_manager: + proxy_headers = self.proxy_headers(proxy) + self.proxy_manager[proxy] = proxy_from_url( + proxy, + proxy_headers=proxy_headers, + num_pools=self._pool_connections, + maxsize=self._pool_maxsize, + block=self._pool_block, + **proxy_kwargs) + + return self.proxy_manager[proxy] def cert_verify(self, conn, url, verify, cert): """Verify a SSL certificate. This method should not be called from user @@ -196,17 +241,14 @@ def get_connection(self, url, proxies=None): proxy = proxies.get(urlparse(url.lower()).scheme) if proxy: - except_on_missing_scheme(proxy) - proxy_headers = self.proxy_headers(proxy) - - if not proxy in self.proxy_manager: - self.proxy_manager[proxy] = proxy_from_url( - proxy, - proxy_headers=proxy_headers) - - conn = self.proxy_manager[proxy].connection_from_url(url) + proxy = prepend_scheme_if_needed(proxy, 'http') + proxy_manager = self.proxy_manager_for(proxy) + conn = proxy_manager.connection_from_url(url) else: - conn = self.poolmanager.connection_from_url(url.lower()) + # Only scheme should be lower case + parsed = urlparse(url) + url = parsed.geturl() + conn = self.poolmanager.connection_from_url(url) return conn @@ -232,11 +274,11 @@ def request_url(self, request, proxies): :param proxies: A dictionary of schemes to proxy URLs. """ proxies = proxies or {} - scheme = urlparse(request.url).scheme.lower() + scheme = urlparse(request.url).scheme proxy = proxies.get(scheme) if proxy and scheme != 'https': - url, _ = urldefrag(request.url) + url = urldefragauth(request.url) else: url = request.path_url @@ -273,10 +315,6 @@ def proxy_headers(self, proxy): username, password = get_auth_from_url(proxy) if username and password: - # Proxy auth usernames and passwords will be urlencoded, we need - # to decode them. - username = unquote(username) - password = unquote(password) headers['Proxy-Authorization'] = _basic_auth_str(username, password) @@ -287,9 +325,12 @@ def send(self, request, stream=False, timeout=None, verify=True, cert=None, prox :param request: The :class:`PreparedRequest ` being sent. :param stream: (optional) Whether to stream the request content. - :param timeout: (optional) The timeout on the request. + :param timeout: (optional) How long to wait for the server to send + data before giving up, as a float, or a (`connect timeout, read + timeout `_) tuple. + :type timeout: float or tuple :param verify: (optional) Whether to verify SSL certificates. - :param vert: (optional) Any user-provided SSL certificate to be trusted. + :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. """ @@ -301,8 +342,16 @@ def send(self, request, stream=False, timeout=None, verify=True, cert=None, prox chunked = not (request.body is None or 'Content-Length' in request.headers) - if stream: - timeout = TimeoutSauce(connect=timeout) + if isinstance(timeout, tuple): + try: + connect, read = timeout + timeout = TimeoutSauce(connect=connect, read=read) + except ValueError as e: + # this may raise a string formatting error. + err = ("Invalid timeout {0}. Pass a (connect, read) " + "timeout tuple, or a single float to set " + "both timeouts to the same value".format(timeout)) + raise ValueError(err) else: timeout = TimeoutSauce(connect=timeout, read=timeout) @@ -327,48 +376,62 @@ def send(self, request, stream=False, timeout=None, verify=True, cert=None, prox conn = conn.proxy_pool low_conn = conn._get_conn(timeout=timeout) - low_conn.putrequest(request.method, url, skip_accept_encoding=True) - - for header, value in request.headers.items(): - low_conn.putheader(header, value) - - low_conn.endheaders() - for i in request.body: - low_conn.send(hex(len(i))[2:].encode('utf-8')) - low_conn.send(b'\r\n') - low_conn.send(i) - low_conn.send(b'\r\n') - low_conn.send(b'0\r\n\r\n') + try: + low_conn.putrequest(request.method, + url, + skip_accept_encoding=True) + + for header, value in request.headers.items(): + low_conn.putheader(header, value) + + low_conn.endheaders() + + for i in request.body: + low_conn.send(hex(len(i))[2:].encode('utf-8')) + low_conn.send(b'\r\n') + low_conn.send(i) + low_conn.send(b'\r\n') + low_conn.send(b'0\r\n\r\n') + + r = low_conn.getresponse() + resp = HTTPResponse.from_httplib( + r, + pool=conn, + connection=low_conn, + preload_content=False, + decode_content=False + ) + except: + # If we hit any problems here, clean up the connection. + # Then, reraise so that we can handle the actual exception. + low_conn.close() + raise + else: + # All is well, return the connection to the pool. + conn._put_conn(low_conn) + + except (ProtocolError, socket.error) as err: + raise ConnectionError(err, request=request) - r = low_conn.getresponse() - resp = HTTPResponse.from_httplib(r, - pool=conn, - connection=low_conn, - preload_content=False, - decode_content=False - ) + except MaxRetryError as e: + if isinstance(e.reason, ConnectTimeoutError): + raise ConnectTimeout(e, request=request) - except socket.error as sockerr: - raise ConnectionError(sockerr) + if isinstance(e.reason, ResponseError): + raise RetryError(e, request=request) - except MaxRetryError as e: - raise ConnectionError(e) + raise ConnectionError(e, request=request) except _ProxyError as e: raise ProxyError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): - raise SSLError(e) - elif isinstance(e, TimeoutError): - raise Timeout(e) + raise SSLError(e, request=request) + elif isinstance(e, ReadTimeoutError): + raise ReadTimeout(e, request=request) else: raise - r = self.build_response(request, resp) - - if not stream: - r.content - - return r + return self.build_response(request, resp) diff --git a/botocore/vendored/requests/api.py b/botocore/vendored/requests/api.py index baf43dd613..1469b05c49 100644 --- a/botocore/vendored/requests/api.py +++ b/botocore/vendored/requests/api.py @@ -22,12 +22,17 @@ def request(method, url, **kwargs): :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json data to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload. + :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request. + :param timeout: (optional) How long to wait for the server to send data + before giving up, as a float, or a (`connect timeout, read timeout + `_) tuple. + :type timeout: float or tuple :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. + :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. :param stream: (optional) if ``False``, the response content will be immediately downloaded. @@ -41,7 +46,12 @@ def request(method, url, **kwargs): """ session = sessions.Session() - return session.request(method=method, url=url, **kwargs) + response = session.request(method=method, url=url, **kwargs) + # By explicitly closing the session, we avoid leaving sockets open which + # can trigger a ResourceWarning in some cases, and look like a memory leak + # in others. + session.close() + return response def get(url, **kwargs): @@ -77,15 +87,16 @@ def head(url, **kwargs): return request('head', url, **kwargs) -def post(url, data=None, **kwargs): +def post(url, data=None, json=None, **kwargs): """Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. """ - return request('post', url, data=data, **kwargs) + return request('post', url, data=data, json=json, **kwargs) def put(url, data=None, **kwargs): diff --git a/botocore/vendored/requests/auth.py b/botocore/vendored/requests/auth.py index 30529e296e..b950181d9e 100644 --- a/botocore/vendored/requests/auth.py +++ b/botocore/vendored/requests/auth.py @@ -11,14 +11,13 @@ import re import time import hashlib -import logging from base64 import b64encode from .compat import urlparse, str -from .utils import parse_dict_header - -log = logging.getLogger(__name__) +from .cookies import extract_cookies_to_jar +from .utils import parse_dict_header, to_native_string +from .status_codes import codes CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' CONTENT_TYPE_MULTI_PART = 'multipart/form-data' @@ -27,7 +26,11 @@ def _basic_auth_str(username, password): """Returns a Basic Auth string.""" - return 'Basic ' + b64encode(('%s:%s' % (username, password)).encode('latin1')).strip().decode('latin1') + authstr = 'Basic ' + to_native_string( + b64encode(('%s:%s' % (username, password)).encode('latin1')).strip() + ) + + return authstr class AuthBase(object): @@ -63,6 +66,8 @@ def __init__(self, username, password): self.last_nonce = '' self.nonce_count = 0 self.chal = {} + self.pos = None + self.num_401_calls = 1 def build_digest_header(self, method, url): @@ -77,7 +82,7 @@ def build_digest_header(self, method, url): else: _algorithm = algorithm.upper() # lambdas assume digest modules are imported at the top level - if _algorithm == 'MD5': + if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': def md5_utf8(x): if isinstance(x, str): x = x.encode('utf-8') @@ -89,7 +94,7 @@ def sha_utf8(x): x = x.encode('utf-8') return hashlib.sha1(x).hexdigest() hash_utf8 = sha_utf8 - # XXX MD5-sess + KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) if hash_utf8 is None: @@ -105,23 +110,28 @@ def sha_utf8(x): A1 = '%s:%s:%s' % (self.username, realm, self.password) A2 = '%s:%s' % (method, path) + HA1 = hash_utf8(A1) + HA2 = hash_utf8(A2) + + if nonce == self.last_nonce: + self.nonce_count += 1 + else: + self.nonce_count = 1 + ncvalue = '%08x' % self.nonce_count + s = str(self.nonce_count).encode('utf-8') + s += nonce.encode('utf-8') + s += time.ctime().encode('utf-8') + s += os.urandom(8) + + cnonce = (hashlib.sha1(s).hexdigest()[:16]) + noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, HA2) + if _algorithm == 'MD5-SESS': + HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) + if qop is None: - respdig = KD(hash_utf8(A1), "%s:%s" % (nonce, hash_utf8(A2))) + respdig = KD(HA1, "%s:%s" % (nonce, HA2)) elif qop == 'auth' or 'auth' in qop.split(','): - if nonce == self.last_nonce: - self.nonce_count += 1 - else: - self.nonce_count = 1 - - ncvalue = '%08x' % self.nonce_count - s = str(self.nonce_count).encode('utf-8') - s += nonce.encode('utf-8') - s += time.ctime().encode('utf-8') - s += os.urandom(8) - - cnonce = (hashlib.sha1(s).hexdigest()[:16]) - noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, hash_utf8(A2)) - respdig = KD(hash_utf8(A1), noncebit) + respdig = KD(HA1, noncebit) else: # XXX handle auth-int. return None @@ -138,19 +148,28 @@ def sha_utf8(x): if entdig: base += ', digest="%s"' % entdig if qop: - base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) + base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) return 'Digest %s' % (base) + def handle_redirect(self, r, **kwargs): + """Reset num_401_calls counter on redirects.""" + if r.is_redirect: + self.num_401_calls = 1 + def handle_401(self, r, **kwargs): """Takes the given response and tries digest-auth, if needed.""" + if self.pos is not None: + # Rewind the file position indicator of the body to where + # it was to resend the request. + r.request.body.seek(self.pos) num_401_calls = getattr(self, 'num_401_calls', 1) s_auth = r.headers.get('www-authenticate', '') if 'digest' in s_auth.lower() and num_401_calls < 2: - setattr(self, 'num_401_calls', num_401_calls + 1) + self.num_401_calls += 1 pat = re.compile(r'digest ', flags=re.IGNORECASE) self.chal = parse_dict_header(pat.sub('', s_auth, count=1)) @@ -159,7 +178,8 @@ def handle_401(self, r, **kwargs): r.content r.raw.release_conn() prep = r.request.copy() - prep.prepare_cookies(r.cookies) + extract_cookies_to_jar(prep._cookies, r.request, r.raw) + prep.prepare_cookies(prep._cookies) prep.headers['Authorization'] = self.build_digest_header( prep.method, prep.url) @@ -169,12 +189,21 @@ def handle_401(self, r, **kwargs): return _r - setattr(self, 'num_401_calls', 1) + self.num_401_calls = 1 return r def __call__(self, r): # If we have a saved nonce, skip the 401 if self.last_nonce: r.headers['Authorization'] = self.build_digest_header(r.method, r.url) + try: + self.pos = r.body.tell() + except AttributeError: + # In the case of HTTPDigestAuth being reused and the body of + # the previous request was a file-like object, pos has the + # file position of the previous body. Ensure it's set to + # None. + self.pos = None r.register_hook('response', self.handle_401) + r.register_hook('response', self.handle_redirect) return r diff --git a/botocore/vendored/requests/cacert.pem b/botocore/vendored/requests/cacert.pem index e908bb6a11..729fe15d40 100644 --- a/botocore/vendored/requests/cacert.pem +++ b/botocore/vendored/requests/cacert.pem @@ -1791,37 +1791,6 @@ QWOnryULwMWSyx6Yo1q6xTMPoJcB3X/ge9YGVM+h4k0460tQtcsm9MracEpqoeJ5 quGnM/b9Sh/22WA= -----END CERTIFICATE----- -# Issuer: CN=Wells Fargo Root Certificate Authority O=Wells Fargo OU=Wells Fargo Certification Authority -# Subject: CN=Wells Fargo Root Certificate Authority O=Wells Fargo OU=Wells Fargo Certification Authority -# Label: "Wells Fargo Root CA" -# Serial: 971282334 -# MD5 Fingerprint: 20:0b:4a:7a:88:a7:a9:42:86:8a:5f:74:56:7b:88:05 -# SHA1 Fingerprint: 93:e6:ab:22:03:03:b5:23:28:dc:da:56:9e:ba:e4:d1:d1:cc:fb:65 -# SHA256 Fingerprint: 03:45:8b:6a:be:ec:c2:14:95:3d:97:14:9a:f4:53:91:69:1d:e9:f9:cd:cc:26:47:86:3a:3d:67:c9:5c:24:3b ------BEGIN CERTIFICATE----- -MIID5TCCAs2gAwIBAgIEOeSXnjANBgkqhkiG9w0BAQUFADCBgjELMAkGA1UEBhMC -VVMxFDASBgNVBAoTC1dlbGxzIEZhcmdvMSwwKgYDVQQLEyNXZWxscyBGYXJnbyBD -ZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTEvMC0GA1UEAxMmV2VsbHMgRmFyZ28gUm9v -dCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwHhcNMDAxMDExMTY0MTI4WhcNMjEwMTE0 -MTY0MTI4WjCBgjELMAkGA1UEBhMCVVMxFDASBgNVBAoTC1dlbGxzIEZhcmdvMSww -KgYDVQQLEyNXZWxscyBGYXJnbyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTEvMC0G -A1UEAxMmV2VsbHMgRmFyZ28gUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggEi -MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDVqDM7Jvk0/82bfuUER84A4n13 -5zHCLielTWi5MbqNQ1mXx3Oqfz1cQJ4F5aHiidlMuD+b+Qy0yGIZLEWukR5zcUHE -SxP9cMIlrCL1dQu3U+SlK93OvRw6esP3E48mVJwWa2uv+9iWsWCaSOAlIiR5NM4O -JgALTqv9i86C1y8IcGjBqAr5dE8Hq6T54oN+J3N0Prj5OEL8pahbSCOz6+MlsoCu -ltQKnMJ4msZoGK43YjdeUXWoWGPAUe5AeH6orxqg4bB4nVCMe+ez/I4jsNtlAHCE -AQgAFG5Uhpq6zPk3EPbg3oQtnaSFN9OH4xXQwReQfhkhahKpdv0SAulPIV4XAgMB -AAGjYTBfMA8GA1UdEwEB/wQFMAMBAf8wTAYDVR0gBEUwQzBBBgtghkgBhvt7hwcB -CzAyMDAGCCsGAQUFBwIBFiRodHRwOi8vd3d3LndlbGxzZmFyZ28uY29tL2NlcnRw -b2xpY3kwDQYJKoZIhvcNAQEFBQADggEBANIn3ZwKdyu7IvICtUpKkfnRLb7kuxpo -7w6kAOnu5+/u9vnldKTC2FJYxHT7zmu1Oyl5GFrvm+0fazbuSCUlFLZWohDo7qd/ -0D+j0MNdJu4HzMPBJCGHHt8qElNvQRbn7a6U+oxy+hNH8Dx+rn0ROhPs7fpvcmR7 -nX1/Jv16+yWt6j4pf0zjAFcysLPp7VMX2YuyFA4w6OXVE8Zkr8QA1dhYJPz1j+zx -x32l2w8n0cbyQIjmH/ZhqPRCyLk306m+LFZ4wnKbWV01QIroTmMatukgalHizqSQ -33ZwmVxwQ023tqcZZE6St8WRPH9IFmV7Fv3L/PvZ1dZPIWU7Sn9Ho/s= ------END CERTIFICATE----- - # Issuer: CN=Swisscom Root CA 1 O=Swisscom OU=Digital Certificate Services # Subject: CN=Swisscom Root CA 1 O=Swisscom OU=Digital Certificate Services # Label: "Swisscom Root CA 1" @@ -4964,3 +4933,94 @@ G5gPcFw0sorMwIUY6256s/daoQe/qUKS82Ail+QUoQebTnbAjn39pCXHR+3/H3Os zMOl6W8KjptlwlCFtaOgUxLMVYdh84GuEEZhvUQhuMI9dM9+JDX6HAcOmz0iyu8x L4ysEr3vQCj8KWefshNPZiTEUxnpHikV7+ZtsH8tZ/3zbBt1RqPlShfppNcL -----END CERTIFICATE----- + +# Issuer: CN=ACCVRAIZ1 O=ACCV OU=PKIACCV +# Subject: CN=ACCVRAIZ1 O=ACCV OU=PKIACCV +# Label: "ACCVRAIZ1" +# Serial: 6828503384748696800 +# MD5 Fingerprint: d0:a0:5a:ee:05:b6:09:94:21:a1:7d:f1:b2:29:82:02 +# SHA1 Fingerprint: 93:05:7a:88:15:c6:4f:ce:88:2f:fa:91:16:52:28:78:bc:53:64:17 +# SHA256 Fingerprint: 9a:6e:c0:12:e1:a7:da:9d:be:34:19:4d:47:8a:d7:c0:db:18:22:fb:07:1d:f1:29:81:49:6e:d1:04:38:41:13 +-----BEGIN CERTIFICATE----- +MIIH0zCCBbugAwIBAgIIXsO3pkN/pOAwDQYJKoZIhvcNAQEFBQAwQjESMBAGA1UE +AwwJQUNDVlJBSVoxMRAwDgYDVQQLDAdQS0lBQ0NWMQ0wCwYDVQQKDARBQ0NWMQsw +CQYDVQQGEwJFUzAeFw0xMTA1MDUwOTM3MzdaFw0zMDEyMzEwOTM3MzdaMEIxEjAQ +BgNVBAMMCUFDQ1ZSQUlaMTEQMA4GA1UECwwHUEtJQUNDVjENMAsGA1UECgwEQUND +VjELMAkGA1UEBhMCRVMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCb +qau/YUqXry+XZpp0X9DZlv3P4uRm7x8fRzPCRKPfmt4ftVTdFXxpNRFvu8gMjmoY +HtiP2Ra8EEg2XPBjs5BaXCQ316PWywlxufEBcoSwfdtNgM3802/J+Nq2DoLSRYWo +G2ioPej0RGy9ocLLA76MPhMAhN9KSMDjIgro6TenGEyxCQ0jVn8ETdkXhBilyNpA +lHPrzg5XPAOBOp0KoVdDaaxXbXmQeOW1tDvYvEyNKKGno6e6Ak4l0Squ7a4DIrhr +IA8wKFSVf+DuzgpmndFALW4ir50awQUZ0m/A8p/4e7MCQvtQqR0tkw8jq8bBD5L/ +0KIV9VMJcRz/RROE5iZe+OCIHAr8Fraocwa48GOEAqDGWuzndN9wrqODJerWx5eH +k6fGioozl2A3ED6XPm4pFdahD9GILBKfb6qkxkLrQaLjlUPTAYVtjrs78yM2x/47 +4KElB0iryYl0/wiPgL/AlmXz7uxLaL2diMMxs0Dx6M/2OLuc5NF/1OVYm3z61PMO +m3WR5LpSLhl+0fXNWhn8ugb2+1KoS5kE3fj5tItQo05iifCHJPqDQsGH+tUtKSpa +cXpkatcnYGMN285J9Y0fkIkyF/hzQ7jSWpOGYdbhdQrqeWZ2iE9x6wQl1gpaepPl +uUsXQA+xtrn13k/c4LOsOxFwYIRKQ26ZIMApcQrAZQIDAQABo4ICyzCCAscwfQYI +KwYBBQUHAQEEcTBvMEwGCCsGAQUFBzAChkBodHRwOi8vd3d3LmFjY3YuZXMvZmls +ZWFkbWluL0FyY2hpdm9zL2NlcnRpZmljYWRvcy9yYWl6YWNjdjEuY3J0MB8GCCsG +AQUFBzABhhNodHRwOi8vb2NzcC5hY2N2LmVzMB0GA1UdDgQWBBTSh7Tj3zcnk1X2 +VuqB5TbMjB4/vTAPBgNVHRMBAf8EBTADAQH/MB8GA1UdIwQYMBaAFNKHtOPfNyeT +VfZW6oHlNsyMHj+9MIIBcwYDVR0gBIIBajCCAWYwggFiBgRVHSAAMIIBWDCCASIG +CCsGAQUFBwICMIIBFB6CARAAQQB1AHQAbwByAGkAZABhAGQAIABkAGUAIABDAGUA +cgB0AGkAZgBpAGMAYQBjAGkA8wBuACAAUgBhAO0AegAgAGQAZQAgAGwAYQAgAEEA +QwBDAFYAIAAoAEEAZwBlAG4AYwBpAGEAIABkAGUAIABUAGUAYwBuAG8AbABvAGcA +7QBhACAAeQAgAEMAZQByAHQAaQBmAGkAYwBhAGMAaQDzAG4AIABFAGwAZQBjAHQA +cgDzAG4AaQBjAGEALAAgAEMASQBGACAAUQA0ADYAMAAxADEANQA2AEUAKQAuACAA +QwBQAFMAIABlAG4AIABoAHQAdABwADoALwAvAHcAdwB3AC4AYQBjAGMAdgAuAGUA +czAwBggrBgEFBQcCARYkaHR0cDovL3d3dy5hY2N2LmVzL2xlZ2lzbGFjaW9uX2Mu +aHRtMFUGA1UdHwROMEwwSqBIoEaGRGh0dHA6Ly93d3cuYWNjdi5lcy9maWxlYWRt +aW4vQXJjaGl2b3MvY2VydGlmaWNhZG9zL3JhaXphY2N2MV9kZXIuY3JsMA4GA1Ud +DwEB/wQEAwIBBjAXBgNVHREEEDAOgQxhY2N2QGFjY3YuZXMwDQYJKoZIhvcNAQEF +BQADggIBAJcxAp/n/UNnSEQU5CmH7UwoZtCPNdpNYbdKl02125DgBS4OxnnQ8pdp +D70ER9m+27Up2pvZrqmZ1dM8MJP1jaGo/AaNRPTKFpV8M9xii6g3+CfYCS0b78gU +JyCpZET/LtZ1qmxNYEAZSUNUY9rizLpm5U9EelvZaoErQNV/+QEnWCzI7UiRfD+m +AM/EKXMRNt6GGT6d7hmKG9Ww7Y49nCrADdg9ZuM8Db3VlFzi4qc1GwQA9j9ajepD +vV+JHanBsMyZ4k0ACtrJJ1vnE5Bc5PUzolVt3OAJTS+xJlsndQAJxGJ3KQhfnlms +tn6tn1QwIgPBHnFk/vk4CpYY3QIUrCPLBhwepH2NDd4nQeit2hW3sCPdK6jT2iWH +7ehVRE2I9DZ+hJp4rPcOVkkO1jMl1oRQQmwgEh0q1b688nCBpHBgvgW1m54ERL5h +I6zppSSMEYCUWqKiuUnSwdzRp+0xESyeGabu4VXhwOrPDYTkF7eifKXeVSUG7szA +h1xA2syVP1XgNce4hL60Xc16gwFy7ofmXx2utYXGJt/mwZrpHgJHnyqobalbz+xF +d3+YJ5oyXSrjhO7FmGYvliAd3djDJ9ew+f7Zfc3Qn48LFFhRny+Lwzgt3uiP1o2H +pPVWQxaZLPSkVrQ0uGE3ycJYgBugl6H8WY3pEfbRD0tVNEYqi4Y7 +-----END CERTIFICATE----- + +# Issuer: CN=TWCA Global Root CA O=TAIWAN-CA OU=Root CA +# Subject: CN=TWCA Global Root CA O=TAIWAN-CA OU=Root CA +# Label: "TWCA Global Root CA" +# Serial: 3262 +# MD5 Fingerprint: f9:03:7e:cf:e6:9e:3c:73:7a:2a:90:07:69:ff:2b:96 +# SHA1 Fingerprint: 9c:bb:48:53:f6:a4:f6:d3:52:a4:e8:32:52:55:60:13:f5:ad:af:65 +# SHA256 Fingerprint: 59:76:90:07:f7:68:5d:0f:cd:50:87:2f:9f:95:d5:75:5a:5b:2b:45:7d:81:f3:69:2b:61:0a:98:67:2f:0e:1b +-----BEGIN CERTIFICATE----- +MIIFQTCCAymgAwIBAgICDL4wDQYJKoZIhvcNAQELBQAwUTELMAkGA1UEBhMCVFcx +EjAQBgNVBAoTCVRBSVdBTi1DQTEQMA4GA1UECxMHUm9vdCBDQTEcMBoGA1UEAxMT +VFdDQSBHbG9iYWwgUm9vdCBDQTAeFw0xMjA2MjcwNjI4MzNaFw0zMDEyMzExNTU5 +NTlaMFExCzAJBgNVBAYTAlRXMRIwEAYDVQQKEwlUQUlXQU4tQ0ExEDAOBgNVBAsT +B1Jvb3QgQ0ExHDAaBgNVBAMTE1RXQ0EgR2xvYmFsIFJvb3QgQ0EwggIiMA0GCSqG +SIb3DQEBAQUAA4ICDwAwggIKAoICAQCwBdvI64zEbooh745NnHEKH1Jw7W2CnJfF +10xORUnLQEK1EjRsGcJ0pDFfhQKX7EMzClPSnIyOt7h52yvVavKOZsTuKwEHktSz +0ALfUPZVr2YOy+BHYC8rMjk1Ujoog/h7FsYYuGLWRyWRzvAZEk2tY/XTP3VfKfCh +MBwqoJimFb3u/Rk28OKRQ4/6ytYQJ0lM793B8YVwm8rqqFpD/G2Gb3PpN0Wp8DbH +zIh1HrtsBv+baz4X7GGqcXzGHaL3SekVtTzWoWH1EfcFbx39Eb7QMAfCKbAJTibc +46KokWofwpFFiFzlmLhxpRUZyXx1EcxwdE8tmx2RRP1WKKD+u4ZqyPpcC1jcxkt2 +yKsi2XMPpfRaAok/T54igu6idFMqPVMnaR1sjjIsZAAmY2E2TqNGtz99sy2sbZCi +laLOz9qC5wc0GZbpuCGqKX6mOL6OKUohZnkfs8O1CWfe1tQHRvMq2uYiN2DLgbYP +oA/pyJV/v1WRBXrPPRXAb94JlAGD1zQbzECl8LibZ9WYkTunhHiVJqRaCPgrdLQA +BDzfuBSO6N+pjWxnkjMdwLfS7JLIvgm/LCkFbwJrnu+8vyq8W8BQj0FwcYeyTbcE +qYSjMq+u7msXi7Kx/mzhkIyIqJdIzshNy/MGz19qCkKxHh53L46g5pIOBvwFItIm +4TFRfTLcDwIDAQABoyMwITAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB +/zANBgkqhkiG9w0BAQsFAAOCAgEAXzSBdu+WHdXltdkCY4QWwa6gcFGn90xHNcgL +1yg9iXHZqjNB6hQbbCEAwGxCGX6faVsgQt+i0trEfJdLjbDorMjupWkEmQqSpqsn +LhpNgb+E1HAerUf+/UqdM+DyucRFCCEK2mlpc3INvjT+lIutwx4116KD7+U4x6WF +H6vPNOw/KP4M8VeGTslV9xzU2KV9Bnpv1d8Q34FOIWWxtuEXeZVFBs5fzNxGiWNo +RI2T9GRwoD2dKAXDOXC4Ynsg/eTb6QihuJ49CcdP+yz4k3ZB3lLg4VfSnQO8d57+ +nile98FRYB/e2guyLXW3Q0iT5/Z5xoRdgFlglPx4mI88k1HtQJAH32RjJMtOcQWh +15QaiDLxInQirqWm2BJpTGCjAu4r7NRjkgtevi92a6O2JryPA9gK8kxkRr05YuWW +6zRjESjMlfGt7+/cgFhI6Uu46mWs6fyAtbXIRfmswZ/ZuepiiI7E8UuDEq3mi4TW +nsLrgxifarsbJGAzcMzs9zLzXNl5fe+epP7JI8Mk7hWSsT2RTyaGvWZzJBPqpK5j +wa19hAM8EHiGG3njxPPyBJUgriOCxLM6AGK/5jYk4Ve6xx6QddVfP5VhK8E7zeWz +aGHQRiapIVJpLesux+t3zqY6tQMzT3bR51xUAV3LePTJDL/PEo4XLSNolOer/qmy +KwbQBM0= +-----END CERTIFICATE----- diff --git a/botocore/vendored/requests/certs.py b/botocore/vendored/requests/certs.py index bc00826191..07e6475070 100644 --- a/botocore/vendored/requests/certs.py +++ b/botocore/vendored/requests/certs.py @@ -11,14 +11,15 @@ environment, you can change the definition of where() to return a separately packaged CA bundle. """ - import os.path - -def where(): - """Return the preferred certificate bundle.""" - # vendored bundle inside Requests - return os.path.join(os.path.dirname(__file__), 'cacert.pem') +try: + from certifi import where +except ImportError: + def where(): + """Return the preferred certificate bundle.""" + # vendored bundle inside Requests + return os.path.join(os.path.dirname(__file__), 'cacert.pem') if __name__ == '__main__': print(where()) diff --git a/botocore/vendored/requests/compat.py b/botocore/vendored/requests/compat.py index 0d61a572df..c07726ee45 100644 --- a/botocore/vendored/requests/compat.py +++ b/botocore/vendored/requests/compat.py @@ -4,7 +4,7 @@ pythoncompat """ -from .packages import charade as chardet +from .packages import chardet import sys @@ -75,7 +75,9 @@ try: import simplejson as json -except ImportError: +except (ImportError, SyntaxError): + # simplejson does not support Python 3.2, it throws a SyntaxError + # because of u'...' Unicode literals. import json # --------- @@ -90,7 +92,6 @@ from Cookie import Morsel from StringIO import StringIO from .packages.urllib3.packages.ordered_dict import OrderedDict - from httplib import IncompleteRead builtin_str = str bytes = str @@ -106,7 +107,6 @@ from http.cookies import Morsel from io import StringIO from collections import OrderedDict - from http.client import IncompleteRead builtin_str = str str = str diff --git a/botocore/vendored/requests/cookies.py b/botocore/vendored/requests/cookies.py index ea56c065a5..831c49c6d2 100644 --- a/botocore/vendored/requests/cookies.py +++ b/botocore/vendored/requests/cookies.py @@ -8,7 +8,7 @@ import time import collections -from .compat import cookielib, urlparse, Morsel +from .compat import cookielib, urlparse, urlunparse, Morsel try: import threading @@ -45,7 +45,18 @@ def get_origin_req_host(self): return self.get_host() def get_full_url(self): - return self._r.url + # Only return the response's URL if the user hadn't set the Host + # header + if not self._r.headers.get('Host'): + return self._r.url + # If they did set it, retrieve it and reconstruct the expected domain + host = self._r.headers['Host'] + parsed = urlparse(self._r.url) + # Reconstruct the URL as we expect it + return urlunparse([ + parsed.scheme, host, parsed.path, parsed.params, parsed.query, + parsed.fragment + ]) def is_unverifiable(self): return True @@ -187,30 +198,39 @@ def set(self, name, value, **kwargs): self.set_cookie(c) return c + def iterkeys(self): + """Dict-like iterkeys() that returns an iterator of names of cookies from the jar. + See itervalues() and iteritems().""" + for cookie in iter(self): + yield cookie.name + def keys(self): """Dict-like keys() that returns a list of names of cookies from the jar. See values() and items().""" - keys = [] + return list(self.iterkeys()) + + def itervalues(self): + """Dict-like itervalues() that returns an iterator of values of cookies from the jar. + See iterkeys() and iteritems().""" for cookie in iter(self): - keys.append(cookie.name) - return keys + yield cookie.value def values(self): """Dict-like values() that returns a list of values of cookies from the jar. See keys() and items().""" - values = [] + return list(self.itervalues()) + + def iteritems(self): + """Dict-like iteritems() that returns an iterator of name-value tuples from the jar. + See iterkeys() and itervalues().""" for cookie in iter(self): - values.append(cookie.value) - return values + yield cookie.name, cookie.value def items(self): """Dict-like items() that returns a list of name-value tuples from the jar. See keys() and values(). Allows client-code to call "dict(RequestsCookieJar) and get a vanilla python dict of key value pairs.""" - items = [] - for cookie in iter(self): - items.append((cookie.name, cookie.value)) - return items + return list(self.iteritems()) def list_domains(self): """Utility method to list all the domains in the jar.""" @@ -267,7 +287,7 @@ def __delitem__(self, name): remove_cookie_by_name(self, name) def set_cookie(self, cookie, *args, **kwargs): - if cookie.value.startswith('"') and cookie.value.endswith('"'): + if hasattr(cookie.value, 'startswith') and cookie.value.startswith('"') and cookie.value.endswith('"'): cookie.value = cookie.value.replace('\\"', '') return super(RequestsCookieJar, self).set_cookie(cookie, *args, **kwargs) @@ -367,29 +387,29 @@ def create_cookie(name, value, **kwargs): def morsel_to_cookie(morsel): """Convert a Morsel object into a Cookie containing the one k/v pair.""" + expires = None - if morsel["max-age"]: - expires = time.time() + morsel["max-age"] + if morsel['max-age']: + expires = time.time() + morsel['max-age'] elif morsel['expires']: - expires = morsel['expires'] - if type(expires) == type(""): - time_template = "%a, %d-%b-%Y %H:%M:%S GMT" - expires = time.mktime(time.strptime(expires, time_template)) - c = create_cookie( - name=morsel.key, - value=morsel.value, - version=morsel['version'] or 0, - port=None, - domain=morsel['domain'], - path=morsel['path'], - secure=bool(morsel['secure']), - expires=expires, - discard=False, + time_template = '%a, %d-%b-%Y %H:%M:%S GMT' + expires = time.mktime( + time.strptime(morsel['expires'], time_template)) - time.timezone + return create_cookie( comment=morsel['comment'], comment_url=bool(morsel['comment']), + discard=False, + domain=morsel['domain'], + expires=expires, + name=morsel.key, + path=morsel['path'], + port=None, rest={'HttpOnly': morsel['httponly']}, - rfc2109=False,) - return c + rfc2109=False, + secure=bool(morsel['secure']), + value=morsel.value, + version=morsel['version'] or 0, + ) def cookiejar_from_dict(cookie_dict, cookiejar=None, overwrite=True): @@ -410,3 +430,25 @@ def cookiejar_from_dict(cookie_dict, cookiejar=None, overwrite=True): cookiejar.set_cookie(create_cookie(name, cookie_dict[name])) return cookiejar + + +def merge_cookies(cookiejar, cookies): + """Add cookies to cookiejar and returns a merged CookieJar. + + :param cookiejar: CookieJar object to add the cookies to. + :param cookies: Dictionary or CookieJar object to be added. + """ + if not isinstance(cookiejar, cookielib.CookieJar): + raise ValueError('You can only merge into CookieJar') + + if isinstance(cookies, dict): + cookiejar = cookiejar_from_dict( + cookies, cookiejar=cookiejar, overwrite=False) + elif isinstance(cookies, cookielib.CookieJar): + try: + cookiejar.update(cookies) + except AttributeError: + for cookie_in_jar in cookies: + cookiejar.set_cookie(cookie_in_jar) + + return cookiejar diff --git a/botocore/vendored/requests/exceptions.py b/botocore/vendored/requests/exceptions.py index bc42b5fff0..89135a802e 100644 --- a/botocore/vendored/requests/exceptions.py +++ b/botocore/vendored/requests/exceptions.py @@ -7,21 +7,29 @@ This module contains the set of Requests' exceptions. """ +from .packages.urllib3.exceptions import HTTPError as BaseHTTPError class RequestException(IOError): """There was an ambiguous exception that occurred while handling your request.""" + def __init__(self, *args, **kwargs): + """ + Initialize RequestException with `request` and `response` objects. + """ + response = kwargs.pop('response', None) + self.response = response + self.request = kwargs.pop('request', None) + if (response is not None and not self.request and + hasattr(response, 'request')): + self.request = self.response.request + super(RequestException, self).__init__(*args, **kwargs) + class HTTPError(RequestException): """An HTTP error occurred.""" - def __init__(self, *args, **kwargs): - """ Initializes HTTPError with optional `response` object. """ - self.response = kwargs.pop('response', None) - super(HTTPError, self).__init__(*args, **kwargs) - class ConnectionError(RequestException): """A Connection error occurred.""" @@ -36,7 +44,23 @@ class SSLError(ConnectionError): class Timeout(RequestException): - """The request timed out.""" + """The request timed out. + + Catching this error will catch both + :exc:`~requests.exceptions.ConnectTimeout` and + :exc:`~requests.exceptions.ReadTimeout` errors. + """ + + +class ConnectTimeout(ConnectionError, Timeout): + """The request timed out while trying to connect to the remote server. + + Requests that produced this error are safe to retry. + """ + + +class ReadTimeout(Timeout): + """The server did not send any data in the allotted amount of time.""" class URLRequired(RequestException): @@ -61,3 +85,15 @@ class InvalidURL(RequestException, ValueError): class ChunkedEncodingError(RequestException): """The server declared chunked encoding but sent an invalid chunk.""" + + +class ContentDecodingError(RequestException, BaseHTTPError): + """Failed to decode response content""" + + +class StreamConsumedError(RequestException, TypeError): + """The content for this response was already consumed""" + + +class RetryError(RequestException): + """Custom retries logic failed""" diff --git a/botocore/vendored/requests/models.py b/botocore/vendored/requests/models.py index 5647bf3d40..b728c84e41 100644 --- a/botocore/vendored/requests/models.py +++ b/botocore/vendored/requests/models.py @@ -8,7 +8,6 @@ """ import collections -import logging import datetime from io import BytesIO, UnsupportedOperation @@ -20,21 +19,34 @@ from .packages.urllib3.fields import RequestField from .packages.urllib3.filepost import encode_multipart_formdata from .packages.urllib3.util import parse_url +from .packages.urllib3.exceptions import ( + DecodeError, ReadTimeoutError, ProtocolError, LocationParseError) from .exceptions import ( - HTTPError, RequestException, MissingSchema, InvalidURL, - ChunkedEncodingError) + HTTPError, MissingSchema, InvalidURL, ChunkedEncodingError, + ContentDecodingError, ConnectionError, StreamConsumedError) from .utils import ( guess_filename, get_auth_from_url, requote_uri, stream_decode_response_unicode, to_key_val_list, parse_header_links, iter_slices, guess_json_utf, super_len, to_native_string) from .compat import ( cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, - is_py2, chardet, json, builtin_str, basestring, IncompleteRead) - + is_py2, chardet, json, builtin_str, basestring) +from .status_codes import codes + +#: The set of HTTP status codes that indicate an automatically +#: processable redirect. +REDIRECT_STATI = ( + codes.moved, # 301 + codes.found, # 302 + codes.other, # 303 + codes.temporary_redirect, # 307 + codes.permanent_redirect, # 308 +) +DEFAULT_REDIRECT_LIMIT = 30 CONTENT_CHUNK_SIZE = 10 * 1024 ITER_CHUNK_SIZE = 512 -log = logging.getLogger(__name__) +json_dumps = json.dumps class RequestEncodingMixin(object): @@ -179,7 +191,8 @@ class Request(RequestHooksMixin): :param url: URL to send. :param headers: dictionary of headers to send. :param files: dictionary of {filename: fileobject} files to multipart upload. - :param data: the body to attach the request. If a dictionary is provided, form-encoding will take place. + :param data: the body to attach to the request. If a dictionary is provided, form-encoding will take place. + :param json: json for the body to attach to the request (if data is not specified). :param params: dictionary of URL parameters to append to the URL. :param auth: Auth handler or (user, pass) tuple. :param cookies: dictionary or CookieJar of cookies to attach to this request. @@ -202,7 +215,8 @@ def __init__(self, params=None, auth=None, cookies=None, - hooks=None): + hooks=None, + json=None): # Default empty dicts for dict params. data = [] if data is None else data @@ -220,6 +234,7 @@ def __init__(self, self.headers = headers self.files = files self.data = data + self.json = json self.params = params self.auth = auth self.cookies = cookies @@ -236,6 +251,7 @@ def prepare(self): headers=self.headers, files=self.files, data=self.data, + json=self.json, params=self.params, auth=self.auth, cookies=self.cookies, @@ -270,20 +286,24 @@ def __init__(self): self.url = None #: dictionary of HTTP headers. self.headers = None + # The `CookieJar` used to create the Cookie header will be stored here + # after prepare_cookies is called + self._cookies = None #: request body to send to the server. self.body = None #: dictionary of callback hooks, for internal usage. self.hooks = default_hooks() def prepare(self, method=None, url=None, headers=None, files=None, - data=None, params=None, auth=None, cookies=None, hooks=None): + data=None, params=None, auth=None, cookies=None, hooks=None, + json=None): """Prepares the entire request with the given parameters.""" self.prepare_method(method) self.prepare_url(url, params) self.prepare_headers(headers) self.prepare_cookies(cookies) - self.prepare_body(data, files) + self.prepare_body(data, files, json) self.prepare_auth(auth, url) # Note that prepare_auth must be last to enable authentication schemes # such as OAuth to work on a fully prepared request. @@ -298,7 +318,8 @@ def copy(self): p = PreparedRequest() p.method = self.method p.url = self.url - p.headers = self.headers.copy() + p.headers = self.headers.copy() if self.headers is not None else None + p._cookies = self._cookies.copy() if self._cookies is not None else None p.body = self.body p.hooks = self.hooks return p @@ -312,19 +333,31 @@ def prepare_method(self, method): def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. - try: - url = unicode(url) - except NameError: - # We're on Python 3. - url = str(url) - except UnicodeDecodeError: - pass + #: We're unable to blindy call unicode/str functions + #: as this will include the bytestring indicator (b'') + #: on python 3.x. + #: https://github.com/kennethreitz/requests/pull/2238 + if isinstance(url, bytes): + url = url.decode('utf8') + else: + url = unicode(url) if is_py2 else str(url) + + # Don't do any URL preparation for non-HTTP schemes like `mailto`, + # `data` etc to work around exceptions from `url_parse`, which + # handles RFC 3986 only. + if ':' in url and not url.lower().startswith('http'): + self.url = url + return # Support for unicode domain names and paths. - scheme, auth, host, port, path, query, fragment = parse_url(url) + try: + scheme, auth, host, port, path, query, fragment = parse_url(url) + except LocationParseError as e: + raise InvalidURL(*e.args) if not scheme: - raise MissingSchema("Invalid URL %r: No schema supplied" % url) + raise MissingSchema("Invalid URL {0!r}: No schema supplied. " + "Perhaps you meant http://{0}?".format(url)) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) @@ -377,7 +410,7 @@ def prepare_headers(self, headers): else: self.headers = CaseInsensitiveDict() - def prepare_body(self, data, files): + def prepare_body(self, data, files, json=None): """Prepares the given HTTP body data.""" # Check if file, fo, generator, iterator. @@ -388,11 +421,13 @@ def prepare_body(self, data, files): content_type = None length = None + if json is not None: + content_type = 'application/json' + body = json_dumps(json) + is_stream = all([ hasattr(data, '__iter__'), - not isinstance(data, basestring), - not isinstance(data, list), - not isinstance(data, dict) + not isinstance(data, (basestring, list, tuple, dict)) ]) try: @@ -407,7 +442,7 @@ def prepare_body(self, data, files): raise NotImplementedError('Streamed bodies and files are mutually exclusive.') if length is not None: - self.headers['Content-Length'] = str(length) + self.headers['Content-Length'] = builtin_str(length) else: self.headers['Transfer-Encoding'] = 'chunked' else: @@ -415,9 +450,9 @@ def prepare_body(self, data, files): if files: (body, content_type) = self._encode_files(files, data) else: - if data: + if data and json is None: body = self._encode_params(data) - if isinstance(data, str) or isinstance(data, builtin_str) or hasattr(data, 'read'): + if isinstance(data, basestring) or hasattr(data, 'read'): content_type = None else: content_type = 'application/x-www-form-urlencoded' @@ -425,7 +460,7 @@ def prepare_body(self, data, files): self.prepare_content_length(body) # Add content-type if it wasn't explicitly provided. - if (content_type) and (not 'content-type' in self.headers): + if content_type and ('content-type' not in self.headers): self.headers['Content-Type'] = content_type self.body = body @@ -433,13 +468,13 @@ def prepare_body(self, data, files): def prepare_content_length(self, body): if hasattr(body, 'seek') and hasattr(body, 'tell'): body.seek(0, 2) - self.headers['Content-Length'] = str(body.tell()) + self.headers['Content-Length'] = builtin_str(body.tell()) body.seek(0, 0) elif body is not None: l = super_len(body) if l: - self.headers['Content-Length'] = str(l) - elif self.method not in ('GET', 'HEAD'): + self.headers['Content-Length'] = builtin_str(l) + elif (self.method not in ('GET', 'HEAD')) and (self.headers.get('Content-Length') is None): self.headers['Content-Length'] = '0' def prepare_auth(self, auth, url=''): @@ -468,14 +503,13 @@ def prepare_cookies(self, cookies): """Prepares the given HTTP cookie data.""" if isinstance(cookies, cookielib.CookieJar): - cookies = cookies + self._cookies = cookies else: - cookies = cookiejar_from_dict(cookies) + self._cookies = cookiejar_from_dict(cookies) - if 'cookie' not in self.headers: - cookie_header = get_cookie_header(cookies, self) - if cookie_header is not None: - self.headers['Cookie'] = cookie_header + cookie_header = get_cookie_header(self._cookies, self) + if cookie_header is not None: + self.headers['Cookie'] = cookie_header def prepare_hooks(self, hooks): """Prepares the given hooks.""" @@ -488,13 +522,26 @@ class Response(object): server's response to an HTTP request. """ + __attrs__ = [ + '_content', + 'status_code', + 'headers', + 'url', + 'history', + 'encoding', + 'reason', + 'cookies', + 'elapsed', + 'request', + ] + def __init__(self): super(Response, self).__init__() self._content = False self._content_consumed = False - #: Integer Code of responded HTTP Status. + #: Integer Code of responded HTTP Status, e.g. 404 or 200. self.status_code = None #: Case-insensitive Dictionary of Response Headers. @@ -503,7 +550,7 @@ def __init__(self): self.headers = CaseInsensitiveDict() #: File-like object representation of response (for advanced usage). - #: Requires that ``stream=True` on the request. + #: Use of ``raw`` requires that ``stream=True`` be set on the request. # This requirement does not apply for use internally to Requests. self.raw = None @@ -518,6 +565,7 @@ def __init__(self): #: up here. The list is sorted from the oldest to the most recent request. self.history = [] + #: Textual reason of responded HTTP Status, e.g. "Not Found" or "OK". self.reason = None #: A CookieJar of Cookies the server sent back. @@ -527,6 +575,29 @@ def __init__(self): #: and the arrival of the response (as a timedelta) self.elapsed = datetime.timedelta(0) + #: The :class:`PreparedRequest ` object to which this + #: is a response. + self.request = None + + def __getstate__(self): + # Consume everything; accessing the content attribute makes + # sure the content has been fully read. + if not self._content_consumed: + self.content + + return dict( + (attr, getattr(self, attr, None)) + for attr in self.__attrs__ + ) + + def __setstate__(self, state): + for name, value in state.items(): + setattr(self, name, value) + + # pickled objects do not have .raw + setattr(self, '_content_consumed', True) + setattr(self, 'raw', None) + def __repr__(self): return '' % (self.status_code) @@ -546,14 +617,25 @@ def __iter__(self): def ok(self): try: self.raise_for_status() - except RequestException: + except HTTPError: return False return True + @property + def is_redirect(self): + """True if this Response is a well-formed HTTP redirect that could have + been processed automatically (by :meth:`Session.resolve_redirects`). + """ + return ('location' in self.headers and self.status_code in REDIRECT_STATI) + + @property + def is_permanent_redirect(self): + """True if this Response one of the permanant versions of redirect""" + return ('location' in self.headers and self.status_code in (codes.moved_permanently, codes.permanent_redirect)) + @property def apparent_encoding(self): - """The apparent encoding, provided by the lovely Charade library - (Thanks, Ian!).""" + """The apparent encoding, provided by the chardet library""" return chardet.detect(self.content)['encoding'] def iter_content(self, chunk_size=1, decode_unicode=False): @@ -562,20 +644,22 @@ def iter_content(self, chunk_size=1, decode_unicode=False): large responses. The chunk size is the number of bytes it should read into memory. This is not necessarily the length of each item returned as decoding can take place. - """ - if self._content_consumed: - # simulate reading small chunks of the content - return iter_slices(self._content, chunk_size) + If decode_unicode is True, content will be decoded using the best + available encoding based on the response. + """ def generate(): try: # Special case for urllib3. try: - for chunk in self.raw.stream(chunk_size, - decode_content=True): + for chunk in self.raw.stream(chunk_size, decode_content=True): yield chunk - except IncompleteRead as e: + except ProtocolError as e: raise ChunkedEncodingError(e) + except DecodeError as e: + raise ContentDecodingError(e) + except ReadTimeoutError as e: + raise ConnectionError(e) except AttributeError: # Standard file-like object. while True: @@ -586,14 +670,21 @@ def generate(): self._content_consumed = True - gen = generate() + if self._content_consumed and isinstance(self._content, bool): + raise StreamConsumedError() + # simulate reading small chunks of the content + reused_chunks = iter_slices(self._content, chunk_size) + + stream_chunks = generate() + + chunks = reused_chunks if self._content_consumed else stream_chunks if decode_unicode: - gen = stream_decode_response_unicode(gen, self) + chunks = stream_decode_response_unicode(chunks, self) - return gen + return chunks - def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None): + def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None, delimiter=None): """Iterates over the response data, one line at a time. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. @@ -601,12 +692,15 @@ def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None): pending = None - for chunk in self.iter_content(chunk_size=chunk_size, - decode_unicode=decode_unicode): + for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode): if pending is not None: chunk = pending + chunk - lines = chunk.splitlines() + + if delimiter: + lines = chunk.split(delimiter) + else: + lines = chunk.splitlines() if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]: pending = lines.pop() @@ -647,8 +741,13 @@ def content(self): def text(self): """Content of the response, in unicode. - if Response.encoding is None and chardet module is available, encoding - will be guessed. + If Response.encoding is None, encoding will be guessed using + ``chardet``. + + The encoding of the response content is determined based solely on HTTP + headers, following RFC 2616 to the letter. If you can take advantage of + non-HTTP knowledge to make a better guess at the encoding, you should + set ``r.encoding`` appropriately before accessing this property. """ # Try charset from content-type @@ -689,7 +788,14 @@ def json(self, **kwargs): # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: - return json.loads(self.content.decode(encoding), **kwargs) + try: + return json.loads(self.content.decode(encoding), **kwargs) + except UnicodeDecodeError: + # Wrong UTF codec detected; usually because it's not UTF-8 + # but some other 8-bit codec. This is an RFC violation, + # and the server didn't bother to tell us what codec *was* + # used. + pass return json.loads(self.text, **kwargs) @property @@ -725,8 +831,8 @@ def raise_for_status(self): raise HTTPError(http_error_msg, response=self) def close(self): - """Closes the underlying file descriptor and releases the connection - back to the pool. + """Releases the connection back to the pool. Once this method has been + called the underlying ``raw`` object must not be accessed again. *Note: Should not normally need to be called explicitly.* """ diff --git a/botocore/vendored/requests/packages/charade/__main__.py b/botocore/vendored/requests/packages/charade/__main__.py deleted file mode 100644 index c0d587fa9e..0000000000 --- a/botocore/vendored/requests/packages/charade/__main__.py +++ /dev/null @@ -1,7 +0,0 @@ -''' -support ';python -m charade [file2] ...' package execution syntax (2.7+) -''' - -from charade import charade_cli - -charade_cli() diff --git a/botocore/vendored/requests/packages/charade/__init__.py b/botocore/vendored/requests/packages/chardet/__init__.py similarity index 55% rename from botocore/vendored/requests/packages/charade/__init__.py rename to botocore/vendored/requests/packages/chardet/__init__.py index 26362e9739..82c2a48d29 100644 --- a/botocore/vendored/requests/packages/charade/__init__.py +++ b/botocore/vendored/requests/packages/chardet/__init__.py @@ -1,66 +1,32 @@ -######################## BEGIN LICENSE BLOCK ######################## -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -__version__ = "1.0.3" -from sys import version_info - - -def detect(aBuf): - if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or - (version_info >= (3, 0) and not isinstance(aBuf, bytes))): - raise ValueError('Expected a bytes object, not a unicode object') - - from . import universaldetector - u = universaldetector.UniversalDetector() - u.reset() - u.feed(aBuf) - u.close() - return u.result - -def _description_of(path): - """Return a string describing the probable encoding of a file.""" - from charade.universaldetector import UniversalDetector - - u = UniversalDetector() - for line in open(path, 'rb'): - u.feed(line) - u.close() - result = u.result - if result['encoding']: - return '%s: %s with confidence %s' % (path, - result['encoding'], - result['confidence']) - else: - return '%s: no result' % path - - -def charade_cli(): - """ - Script which takes one or more file paths and reports on their detected - encodings - - Example:: - - % chardetect.py somefile someotherfile - somefile: windows-1252 with confidence 0.5 - someotherfile: ascii with confidence 1.0 - - """ - from sys import argv - for path in argv[1:]: - print(_description_of(path)) - \ No newline at end of file +######################## BEGIN LICENSE BLOCK ######################## +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +__version__ = "2.3.0" +from sys import version_info + + +def detect(aBuf): + if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or + (version_info >= (3, 0) and not isinstance(aBuf, bytes))): + raise ValueError('Expected a bytes object, not a unicode object') + + from . import universaldetector + u = universaldetector.UniversalDetector() + u.reset() + u.feed(aBuf) + u.close() + return u.result diff --git a/botocore/vendored/requests/packages/charade/big5freq.py b/botocore/vendored/requests/packages/chardet/big5freq.py similarity index 100% rename from botocore/vendored/requests/packages/charade/big5freq.py rename to botocore/vendored/requests/packages/chardet/big5freq.py diff --git a/botocore/vendored/requests/packages/charade/big5prober.py b/botocore/vendored/requests/packages/chardet/big5prober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/big5prober.py rename to botocore/vendored/requests/packages/chardet/big5prober.py index 7382f7c5d4..becce81e5e 100644 --- a/botocore/vendored/requests/packages/charade/big5prober.py +++ b/botocore/vendored/requests/packages/chardet/big5prober.py @@ -1,42 +1,42 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import Big5DistributionAnalysis -from .mbcssm import Big5SMModel - - -class Big5Prober(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(Big5SMModel) - self._mDistributionAnalyzer = Big5DistributionAnalysis() - self.reset() - - def get_charset_name(self): - return "Big5" +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import Big5DistributionAnalysis +from .mbcssm import Big5SMModel + + +class Big5Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(Big5SMModel) + self._mDistributionAnalyzer = Big5DistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "Big5" diff --git a/botocore/vendored/requests/packages/chardet/chardetect.py b/botocore/vendored/requests/packages/chardet/chardetect.py new file mode 100644 index 0000000000..ffe892f25d --- /dev/null +++ b/botocore/vendored/requests/packages/chardet/chardetect.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +""" +Script which takes one or more file paths and reports on their detected +encodings + +Example:: + + % chardetect somefile someotherfile + somefile: windows-1252 with confidence 0.5 + someotherfile: ascii with confidence 1.0 + +If no paths are provided, it takes its input from stdin. + +""" + +from __future__ import absolute_import, print_function, unicode_literals + +import argparse +import sys +from io import open + +from chardet import __version__ +from chardet.universaldetector import UniversalDetector + + +def description_of(lines, name='stdin'): + """ + Return a string describing the probable encoding of a file or + list of strings. + + :param lines: The lines to get the encoding of. + :type lines: Iterable of bytes + :param name: Name of file or collection of lines + :type name: str + """ + u = UniversalDetector() + for line in lines: + u.feed(line) + u.close() + result = u.result + if result['encoding']: + return '{0}: {1} with confidence {2}'.format(name, result['encoding'], + result['confidence']) + else: + return '{0}: no result'.format(name) + + +def main(argv=None): + ''' + Handles command line arguments and gets things started. + + :param argv: List of arguments, as if specified on the command-line. + If None, ``sys.argv[1:]`` is used instead. + :type argv: list of str + ''' + # Get command line arguments + parser = argparse.ArgumentParser( + description="Takes one or more file paths and reports their detected \ + encodings", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + conflict_handler='resolve') + parser.add_argument('input', + help='File whose encoding we would like to determine.', + type=argparse.FileType('rb'), nargs='*', + default=[sys.stdin]) + parser.add_argument('--version', action='version', + version='%(prog)s {0}'.format(__version__)) + args = parser.parse_args(argv) + + for f in args.input: + if f.isatty(): + print("You are running chardetect interactively. Press " + + "CTRL-D twice at the start of a blank line to signal the " + + "end of your input. If you want help, run chardetect " + + "--help\n", file=sys.stderr) + print(description_of(f, f.name)) + + +if __name__ == '__main__': + main() diff --git a/botocore/vendored/requests/packages/charade/chardistribution.py b/botocore/vendored/requests/packages/chardet/chardistribution.py similarity index 97% rename from botocore/vendored/requests/packages/charade/chardistribution.py rename to botocore/vendored/requests/packages/chardet/chardistribution.py index dfd3355e91..4e64a00bef 100644 --- a/botocore/vendored/requests/packages/charade/chardistribution.py +++ b/botocore/vendored/requests/packages/chardet/chardistribution.py @@ -1,231 +1,231 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, - EUCTW_TYPICAL_DISTRIBUTION_RATIO) -from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, - EUCKR_TYPICAL_DISTRIBUTION_RATIO) -from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE, - GB2312_TYPICAL_DISTRIBUTION_RATIO) -from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE, - BIG5_TYPICAL_DISTRIBUTION_RATIO) -from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE, - JIS_TYPICAL_DISTRIBUTION_RATIO) -from .compat import wrap_ord - -ENOUGH_DATA_THRESHOLD = 1024 -SURE_YES = 0.99 -SURE_NO = 0.01 -MINIMUM_DATA_THRESHOLD = 3 - - -class CharDistributionAnalysis: - def __init__(self): - # Mapping table to get frequency order from char order (get from - # GetOrder()) - self._mCharToFreqOrder = None - self._mTableSize = None # Size of above table - # This is a constant value which varies from language to language, - # used in calculating confidence. See - # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html - # for further detail. - self._mTypicalDistributionRatio = None - self.reset() - - def reset(self): - """reset analyser, clear any state""" - # If this flag is set to True, detection is done and conclusion has - # been made - self._mDone = False - self._mTotalChars = 0 # Total characters encountered - # The number of characters whose frequency order is less than 512 - self._mFreqChars = 0 - - def feed(self, aBuf, aCharLen): - """feed a character with known length""" - if aCharLen == 2: - # we only care about 2-bytes character in our distribution analysis - order = self.get_order(aBuf) - else: - order = -1 - if order >= 0: - self._mTotalChars += 1 - # order is valid - if order < self._mTableSize: - if 512 > self._mCharToFreqOrder[order]: - self._mFreqChars += 1 - - def get_confidence(self): - """return confidence based on existing data""" - # if we didn't receive any character in our consideration range, - # return negative answer - if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD: - return SURE_NO - - if self._mTotalChars != self._mFreqChars: - r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars) - * self._mTypicalDistributionRatio)) - if r < SURE_YES: - return r - - # normalize confidence (we don't want to be 100% sure) - return SURE_YES - - def got_enough_data(self): - # It is not necessary to receive all data to draw conclusion. - # For charset detection, certain amount of data is enough - return self._mTotalChars > ENOUGH_DATA_THRESHOLD - - def get_order(self, aBuf): - # We do not handle characters based on the original encoding string, - # but convert this encoding string to a number, here called order. - # This allows multiple encodings of a language to share one frequency - # table. - return -1 - - -class EUCTWDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = EUCTWCharToFreqOrder - self._mTableSize = EUCTW_TABLE_SIZE - self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for euc-TW encoding, we are interested - # first byte range: 0xc4 -- 0xfe - # second byte range: 0xa1 -- 0xfe - # no validation needed here. State machine has done that - first_char = wrap_ord(aBuf[0]) - if first_char >= 0xC4: - return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1 - else: - return -1 - - -class EUCKRDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = EUCKRCharToFreqOrder - self._mTableSize = EUCKR_TABLE_SIZE - self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for euc-KR encoding, we are interested - # first byte range: 0xb0 -- 0xfe - # second byte range: 0xa1 -- 0xfe - # no validation needed here. State machine has done that - first_char = wrap_ord(aBuf[0]) - if first_char >= 0xB0: - return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1 - else: - return -1 - - -class GB2312DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = GB2312CharToFreqOrder - self._mTableSize = GB2312_TABLE_SIZE - self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for GB2312 encoding, we are interested - # first byte range: 0xb0 -- 0xfe - # second byte range: 0xa1 -- 0xfe - # no validation needed here. State machine has done that - first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) - if (first_char >= 0xB0) and (second_char >= 0xA1): - return 94 * (first_char - 0xB0) + second_char - 0xA1 - else: - return -1 - - -class Big5DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = Big5CharToFreqOrder - self._mTableSize = BIG5_TABLE_SIZE - self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for big5 encoding, we are interested - # first byte range: 0xa4 -- 0xfe - # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe - # no validation needed here. State machine has done that - first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) - if first_char >= 0xA4: - if second_char >= 0xA1: - return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 - else: - return 157 * (first_char - 0xA4) + second_char - 0x40 - else: - return -1 - - -class SJISDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = JISCharToFreqOrder - self._mTableSize = JIS_TABLE_SIZE - self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for sjis encoding, we are interested - # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe - # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe - # no validation needed here. State machine has done that - first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) - if (first_char >= 0x81) and (first_char <= 0x9F): - order = 188 * (first_char - 0x81) - elif (first_char >= 0xE0) and (first_char <= 0xEF): - order = 188 * (first_char - 0xE0 + 31) - else: - return -1 - order = order + second_char - 0x40 - if second_char > 0x7F: - order = -1 - return order - - -class EUCJPDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = JISCharToFreqOrder - self._mTableSize = JIS_TABLE_SIZE - self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for euc-JP encoding, we are interested - # first byte range: 0xa0 -- 0xfe - # second byte range: 0xa1 -- 0xfe - # no validation needed here. State machine has done that - char = wrap_ord(aBuf[0]) - if char >= 0xA0: - return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1 - else: - return -1 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, + EUCTW_TYPICAL_DISTRIBUTION_RATIO) +from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, + EUCKR_TYPICAL_DISTRIBUTION_RATIO) +from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE, + GB2312_TYPICAL_DISTRIBUTION_RATIO) +from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE, + BIG5_TYPICAL_DISTRIBUTION_RATIO) +from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE, + JIS_TYPICAL_DISTRIBUTION_RATIO) +from .compat import wrap_ord + +ENOUGH_DATA_THRESHOLD = 1024 +SURE_YES = 0.99 +SURE_NO = 0.01 +MINIMUM_DATA_THRESHOLD = 3 + + +class CharDistributionAnalysis: + def __init__(self): + # Mapping table to get frequency order from char order (get from + # GetOrder()) + self._mCharToFreqOrder = None + self._mTableSize = None # Size of above table + # This is a constant value which varies from language to language, + # used in calculating confidence. See + # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html + # for further detail. + self._mTypicalDistributionRatio = None + self.reset() + + def reset(self): + """reset analyser, clear any state""" + # If this flag is set to True, detection is done and conclusion has + # been made + self._mDone = False + self._mTotalChars = 0 # Total characters encountered + # The number of characters whose frequency order is less than 512 + self._mFreqChars = 0 + + def feed(self, aBuf, aCharLen): + """feed a character with known length""" + if aCharLen == 2: + # we only care about 2-bytes character in our distribution analysis + order = self.get_order(aBuf) + else: + order = -1 + if order >= 0: + self._mTotalChars += 1 + # order is valid + if order < self._mTableSize: + if 512 > self._mCharToFreqOrder[order]: + self._mFreqChars += 1 + + def get_confidence(self): + """return confidence based on existing data""" + # if we didn't receive any character in our consideration range, + # return negative answer + if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD: + return SURE_NO + + if self._mTotalChars != self._mFreqChars: + r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars) + * self._mTypicalDistributionRatio)) + if r < SURE_YES: + return r + + # normalize confidence (we don't want to be 100% sure) + return SURE_YES + + def got_enough_data(self): + # It is not necessary to receive all data to draw conclusion. + # For charset detection, certain amount of data is enough + return self._mTotalChars > ENOUGH_DATA_THRESHOLD + + def get_order(self, aBuf): + # We do not handle characters based on the original encoding string, + # but convert this encoding string to a number, here called order. + # This allows multiple encodings of a language to share one frequency + # table. + return -1 + + +class EUCTWDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = EUCTWCharToFreqOrder + self._mTableSize = EUCTW_TABLE_SIZE + self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for euc-TW encoding, we are interested + # first byte range: 0xc4 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char = wrap_ord(aBuf[0]) + if first_char >= 0xC4: + return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1 + else: + return -1 + + +class EUCKRDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = EUCKRCharToFreqOrder + self._mTableSize = EUCKR_TABLE_SIZE + self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for euc-KR encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char = wrap_ord(aBuf[0]) + if first_char >= 0xB0: + return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1 + else: + return -1 + + +class GB2312DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = GB2312CharToFreqOrder + self._mTableSize = GB2312_TABLE_SIZE + self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for GB2312 encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) + if (first_char >= 0xB0) and (second_char >= 0xA1): + return 94 * (first_char - 0xB0) + second_char - 0xA1 + else: + return -1 + + +class Big5DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = Big5CharToFreqOrder + self._mTableSize = BIG5_TABLE_SIZE + self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for big5 encoding, we are interested + # first byte range: 0xa4 -- 0xfe + # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) + if first_char >= 0xA4: + if second_char >= 0xA1: + return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 + else: + return 157 * (first_char - 0xA4) + second_char - 0x40 + else: + return -1 + + +class SJISDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = JISCharToFreqOrder + self._mTableSize = JIS_TABLE_SIZE + self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for sjis encoding, we are interested + # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe + # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe + # no validation needed here. State machine has done that + first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) + if (first_char >= 0x81) and (first_char <= 0x9F): + order = 188 * (first_char - 0x81) + elif (first_char >= 0xE0) and (first_char <= 0xEF): + order = 188 * (first_char - 0xE0 + 31) + else: + return -1 + order = order + second_char - 0x40 + if second_char > 0x7F: + order = -1 + return order + + +class EUCJPDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = JISCharToFreqOrder + self._mTableSize = JIS_TABLE_SIZE + self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for euc-JP encoding, we are interested + # first byte range: 0xa0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + char = wrap_ord(aBuf[0]) + if char >= 0xA0: + return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1 + else: + return -1 diff --git a/botocore/vendored/requests/packages/charade/charsetgroupprober.py b/botocore/vendored/requests/packages/chardet/charsetgroupprober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/charsetgroupprober.py rename to botocore/vendored/requests/packages/chardet/charsetgroupprober.py index 2959654748..85e7a1c67d 100644 --- a/botocore/vendored/requests/packages/charade/charsetgroupprober.py +++ b/botocore/vendored/requests/packages/chardet/charsetgroupprober.py @@ -1,106 +1,106 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from . import constants -import sys -from .charsetprober import CharSetProber - - -class CharSetGroupProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mActiveNum = 0 - self._mProbers = [] - self._mBestGuessProber = None - - def reset(self): - CharSetProber.reset(self) - self._mActiveNum = 0 - for prober in self._mProbers: - if prober: - prober.reset() - prober.active = True - self._mActiveNum += 1 - self._mBestGuessProber = None - - def get_charset_name(self): - if not self._mBestGuessProber: - self.get_confidence() - if not self._mBestGuessProber: - return None -# self._mBestGuessProber = self._mProbers[0] - return self._mBestGuessProber.get_charset_name() - - def feed(self, aBuf): - for prober in self._mProbers: - if not prober: - continue - if not prober.active: - continue - st = prober.feed(aBuf) - if not st: - continue - if st == constants.eFoundIt: - self._mBestGuessProber = prober - return self.get_state() - elif st == constants.eNotMe: - prober.active = False - self._mActiveNum -= 1 - if self._mActiveNum <= 0: - self._mState = constants.eNotMe - return self.get_state() - return self.get_state() - - def get_confidence(self): - st = self.get_state() - if st == constants.eFoundIt: - return 0.99 - elif st == constants.eNotMe: - return 0.01 - bestConf = 0.0 - self._mBestGuessProber = None - for prober in self._mProbers: - if not prober: - continue - if not prober.active: - if constants._debug: - sys.stderr.write(prober.get_charset_name() - + ' not active\n') - continue - cf = prober.get_confidence() - if constants._debug: - sys.stderr.write('%s confidence = %s\n' % - (prober.get_charset_name(), cf)) - if bestConf < cf: - bestConf = cf - self._mBestGuessProber = prober - if not self._mBestGuessProber: - return 0.0 - return bestConf -# else: -# self._mBestGuessProber = self._mProbers[0] -# return self._mBestGuessProber.get_confidence() +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from . import constants +import sys +from .charsetprober import CharSetProber + + +class CharSetGroupProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mActiveNum = 0 + self._mProbers = [] + self._mBestGuessProber = None + + def reset(self): + CharSetProber.reset(self) + self._mActiveNum = 0 + for prober in self._mProbers: + if prober: + prober.reset() + prober.active = True + self._mActiveNum += 1 + self._mBestGuessProber = None + + def get_charset_name(self): + if not self._mBestGuessProber: + self.get_confidence() + if not self._mBestGuessProber: + return None +# self._mBestGuessProber = self._mProbers[0] + return self._mBestGuessProber.get_charset_name() + + def feed(self, aBuf): + for prober in self._mProbers: + if not prober: + continue + if not prober.active: + continue + st = prober.feed(aBuf) + if not st: + continue + if st == constants.eFoundIt: + self._mBestGuessProber = prober + return self.get_state() + elif st == constants.eNotMe: + prober.active = False + self._mActiveNum -= 1 + if self._mActiveNum <= 0: + self._mState = constants.eNotMe + return self.get_state() + return self.get_state() + + def get_confidence(self): + st = self.get_state() + if st == constants.eFoundIt: + return 0.99 + elif st == constants.eNotMe: + return 0.01 + bestConf = 0.0 + self._mBestGuessProber = None + for prober in self._mProbers: + if not prober: + continue + if not prober.active: + if constants._debug: + sys.stderr.write(prober.get_charset_name() + + ' not active\n') + continue + cf = prober.get_confidence() + if constants._debug: + sys.stderr.write('%s confidence = %s\n' % + (prober.get_charset_name(), cf)) + if bestConf < cf: + bestConf = cf + self._mBestGuessProber = prober + if not self._mBestGuessProber: + return 0.0 + return bestConf +# else: +# self._mBestGuessProber = self._mProbers[0] +# return self._mBestGuessProber.get_confidence() diff --git a/botocore/vendored/requests/packages/charade/charsetprober.py b/botocore/vendored/requests/packages/chardet/charsetprober.py similarity index 100% rename from botocore/vendored/requests/packages/charade/charsetprober.py rename to botocore/vendored/requests/packages/chardet/charsetprober.py diff --git a/botocore/vendored/requests/packages/charade/codingstatemachine.py b/botocore/vendored/requests/packages/chardet/codingstatemachine.py similarity index 97% rename from botocore/vendored/requests/packages/charade/codingstatemachine.py rename to botocore/vendored/requests/packages/chardet/codingstatemachine.py index 1bda9ff162..8dd8c91798 100644 --- a/botocore/vendored/requests/packages/charade/codingstatemachine.py +++ b/botocore/vendored/requests/packages/chardet/codingstatemachine.py @@ -1,61 +1,61 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .constants import eStart -from .compat import wrap_ord - - -class CodingStateMachine: - def __init__(self, sm): - self._mModel = sm - self._mCurrentBytePos = 0 - self._mCurrentCharLen = 0 - self.reset() - - def reset(self): - self._mCurrentState = eStart - - def next_state(self, c): - # for each byte we get its class - # if it is first byte, we also get byte length - # PY3K: aBuf is a byte stream, so c is an int, not a byte - byteCls = self._mModel['classTable'][wrap_ord(c)] - if self._mCurrentState == eStart: - self._mCurrentBytePos = 0 - self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] - # from byte's class and stateTable, we get its next state - curr_state = (self._mCurrentState * self._mModel['classFactor'] - + byteCls) - self._mCurrentState = self._mModel['stateTable'][curr_state] - self._mCurrentBytePos += 1 - return self._mCurrentState - - def get_current_charlen(self): - return self._mCurrentCharLen - - def get_coding_state_machine(self): - return self._mModel['name'] +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .constants import eStart +from .compat import wrap_ord + + +class CodingStateMachine: + def __init__(self, sm): + self._mModel = sm + self._mCurrentBytePos = 0 + self._mCurrentCharLen = 0 + self.reset() + + def reset(self): + self._mCurrentState = eStart + + def next_state(self, c): + # for each byte we get its class + # if it is first byte, we also get byte length + # PY3K: aBuf is a byte stream, so c is an int, not a byte + byteCls = self._mModel['classTable'][wrap_ord(c)] + if self._mCurrentState == eStart: + self._mCurrentBytePos = 0 + self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] + # from byte's class and stateTable, we get its next state + curr_state = (self._mCurrentState * self._mModel['classFactor'] + + byteCls) + self._mCurrentState = self._mModel['stateTable'][curr_state] + self._mCurrentBytePos += 1 + return self._mCurrentState + + def get_current_charlen(self): + return self._mCurrentCharLen + + def get_coding_state_machine(self): + return self._mModel['name'] diff --git a/botocore/vendored/requests/packages/charade/compat.py b/botocore/vendored/requests/packages/chardet/compat.py similarity index 100% rename from botocore/vendored/requests/packages/charade/compat.py rename to botocore/vendored/requests/packages/chardet/compat.py diff --git a/botocore/vendored/requests/packages/charade/constants.py b/botocore/vendored/requests/packages/chardet/constants.py similarity index 97% rename from botocore/vendored/requests/packages/charade/constants.py rename to botocore/vendored/requests/packages/chardet/constants.py index a3d27de250..e4d148b3c5 100644 --- a/botocore/vendored/requests/packages/charade/constants.py +++ b/botocore/vendored/requests/packages/chardet/constants.py @@ -1,39 +1,39 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -_debug = 0 - -eDetecting = 0 -eFoundIt = 1 -eNotMe = 2 - -eStart = 0 -eError = 1 -eItsMe = 2 - -SHORTCUT_THRESHOLD = 0.95 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +_debug = 0 + +eDetecting = 0 +eFoundIt = 1 +eNotMe = 2 + +eStart = 0 +eError = 1 +eItsMe = 2 + +SHORTCUT_THRESHOLD = 0.95 diff --git a/botocore/vendored/requests/packages/charade/cp949prober.py b/botocore/vendored/requests/packages/chardet/cp949prober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/cp949prober.py rename to botocore/vendored/requests/packages/chardet/cp949prober.py index 543501fe09..ff4272f82a 100644 --- a/botocore/vendored/requests/packages/charade/cp949prober.py +++ b/botocore/vendored/requests/packages/chardet/cp949prober.py @@ -1,44 +1,44 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import EUCKRDistributionAnalysis -from .mbcssm import CP949SMModel - - -class CP949Prober(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(CP949SMModel) - # NOTE: CP949 is a superset of EUC-KR, so the distribution should be - # not different. - self._mDistributionAnalyzer = EUCKRDistributionAnalysis() - self.reset() - - def get_charset_name(self): - return "CP949" +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCKRDistributionAnalysis +from .mbcssm import CP949SMModel + + +class CP949Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(CP949SMModel) + # NOTE: CP949 is a superset of EUC-KR, so the distribution should be + # not different. + self._mDistributionAnalyzer = EUCKRDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "CP949" diff --git a/botocore/vendored/requests/packages/charade/escprober.py b/botocore/vendored/requests/packages/chardet/escprober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/escprober.py rename to botocore/vendored/requests/packages/chardet/escprober.py index 0063935ce6..80a844ff34 100644 --- a/botocore/vendored/requests/packages/charade/escprober.py +++ b/botocore/vendored/requests/packages/chardet/escprober.py @@ -1,86 +1,86 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from . import constants -from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, - ISO2022KRSMModel) -from .charsetprober import CharSetProber -from .codingstatemachine import CodingStateMachine -from .compat import wrap_ord - - -class EscCharSetProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mCodingSM = [ - CodingStateMachine(HZSMModel), - CodingStateMachine(ISO2022CNSMModel), - CodingStateMachine(ISO2022JPSMModel), - CodingStateMachine(ISO2022KRSMModel) - ] - self.reset() - - def reset(self): - CharSetProber.reset(self) - for codingSM in self._mCodingSM: - if not codingSM: - continue - codingSM.active = True - codingSM.reset() - self._mActiveSM = len(self._mCodingSM) - self._mDetectedCharset = None - - def get_charset_name(self): - return self._mDetectedCharset - - def get_confidence(self): - if self._mDetectedCharset: - return 0.99 - else: - return 0.00 - - def feed(self, aBuf): - for c in aBuf: - # PY3K: aBuf is a byte array, so c is an int, not a byte - for codingSM in self._mCodingSM: - if not codingSM: - continue - if not codingSM.active: - continue - codingState = codingSM.next_state(wrap_ord(c)) - if codingState == constants.eError: - codingSM.active = False - self._mActiveSM -= 1 - if self._mActiveSM <= 0: - self._mState = constants.eNotMe - return self.get_state() - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 - return self.get_state() - - return self.get_state() +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from . import constants +from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, + ISO2022KRSMModel) +from .charsetprober import CharSetProber +from .codingstatemachine import CodingStateMachine +from .compat import wrap_ord + + +class EscCharSetProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mCodingSM = [ + CodingStateMachine(HZSMModel), + CodingStateMachine(ISO2022CNSMModel), + CodingStateMachine(ISO2022JPSMModel), + CodingStateMachine(ISO2022KRSMModel) + ] + self.reset() + + def reset(self): + CharSetProber.reset(self) + for codingSM in self._mCodingSM: + if not codingSM: + continue + codingSM.active = True + codingSM.reset() + self._mActiveSM = len(self._mCodingSM) + self._mDetectedCharset = None + + def get_charset_name(self): + return self._mDetectedCharset + + def get_confidence(self): + if self._mDetectedCharset: + return 0.99 + else: + return 0.00 + + def feed(self, aBuf): + for c in aBuf: + # PY3K: aBuf is a byte array, so c is an int, not a byte + for codingSM in self._mCodingSM: + if not codingSM: + continue + if not codingSM.active: + continue + codingState = codingSM.next_state(wrap_ord(c)) + if codingState == constants.eError: + codingSM.active = False + self._mActiveSM -= 1 + if self._mActiveSM <= 0: + self._mState = constants.eNotMe + return self.get_state() + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 + return self.get_state() + + return self.get_state() diff --git a/botocore/vendored/requests/packages/charade/escsm.py b/botocore/vendored/requests/packages/chardet/escsm.py similarity index 97% rename from botocore/vendored/requests/packages/charade/escsm.py rename to botocore/vendored/requests/packages/chardet/escsm.py index 1cf3aa6db6..bd302b4c61 100644 --- a/botocore/vendored/requests/packages/charade/escsm.py +++ b/botocore/vendored/requests/packages/chardet/escsm.py @@ -1,242 +1,242 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .constants import eStart, eError, eItsMe - -HZ_cls = ( -1,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,0,0,0,0, # 20 - 27 -0,0,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,0,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,4,0,5,2,0, # 78 - 7f -1,1,1,1,1,1,1,1, # 80 - 87 -1,1,1,1,1,1,1,1, # 88 - 8f -1,1,1,1,1,1,1,1, # 90 - 97 -1,1,1,1,1,1,1,1, # 98 - 9f -1,1,1,1,1,1,1,1, # a0 - a7 -1,1,1,1,1,1,1,1, # a8 - af -1,1,1,1,1,1,1,1, # b0 - b7 -1,1,1,1,1,1,1,1, # b8 - bf -1,1,1,1,1,1,1,1, # c0 - c7 -1,1,1,1,1,1,1,1, # c8 - cf -1,1,1,1,1,1,1,1, # d0 - d7 -1,1,1,1,1,1,1,1, # d8 - df -1,1,1,1,1,1,1,1, # e0 - e7 -1,1,1,1,1,1,1,1, # e8 - ef -1,1,1,1,1,1,1,1, # f0 - f7 -1,1,1,1,1,1,1,1, # f8 - ff -) - -HZ_st = ( -eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f -eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 - 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f - 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 - 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f -) - -HZCharLenTable = (0, 0, 0, 0, 0, 0) - -HZSMModel = {'classTable': HZ_cls, - 'classFactor': 6, - 'stateTable': HZ_st, - 'charLenTable': HZCharLenTable, - 'name': "HZ-GB-2312"} - -ISO2022CN_cls = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,0,0,0,0, # 20 - 27 -0,3,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,4,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff -) - -ISO2022CN_st = ( -eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 -eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f -eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 -eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 - 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 -eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f -) - -ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) - -ISO2022CNSMModel = {'classTable': ISO2022CN_cls, - 'classFactor': 9, - 'stateTable': ISO2022CN_st, - 'charLenTable': ISO2022CNCharLenTable, - 'name': "ISO-2022-CN"} - -ISO2022JP_cls = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,2,2, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,7,0,0,0, # 20 - 27 -3,0,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -6,0,4,0,8,0,0,0, # 40 - 47 -0,9,5,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff -) - -ISO2022JP_st = ( -eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 -eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 -eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f -eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 -eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f -eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f -eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 -) - -ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - -ISO2022JPSMModel = {'classTable': ISO2022JP_cls, - 'classFactor': 10, - 'stateTable': ISO2022JP_st, - 'charLenTable': ISO2022JPCharLenTable, - 'name': "ISO-2022-JP"} - -ISO2022KR_cls = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,3,0,0,0, # 20 - 27 -0,4,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,5,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff -) - -ISO2022KR_st = ( -eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f -eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 -eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f -eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 -) - -ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) - -ISO2022KRSMModel = {'classTable': ISO2022KR_cls, - 'classFactor': 6, - 'stateTable': ISO2022KR_st, - 'charLenTable': ISO2022KRCharLenTable, - 'name': "ISO-2022-KR"} - -# flake8: noqa +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .constants import eStart, eError, eItsMe + +HZ_cls = ( +1,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,0,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,4,0,5,2,0, # 78 - 7f +1,1,1,1,1,1,1,1, # 80 - 87 +1,1,1,1,1,1,1,1, # 88 - 8f +1,1,1,1,1,1,1,1, # 90 - 97 +1,1,1,1,1,1,1,1, # 98 - 9f +1,1,1,1,1,1,1,1, # a0 - a7 +1,1,1,1,1,1,1,1, # a8 - af +1,1,1,1,1,1,1,1, # b0 - b7 +1,1,1,1,1,1,1,1, # b8 - bf +1,1,1,1,1,1,1,1, # c0 - c7 +1,1,1,1,1,1,1,1, # c8 - cf +1,1,1,1,1,1,1,1, # d0 - d7 +1,1,1,1,1,1,1,1, # d8 - df +1,1,1,1,1,1,1,1, # e0 - e7 +1,1,1,1,1,1,1,1, # e8 - ef +1,1,1,1,1,1,1,1, # f0 - f7 +1,1,1,1,1,1,1,1, # f8 - ff +) + +HZ_st = ( +eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 + 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f + 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 + 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f +) + +HZCharLenTable = (0, 0, 0, 0, 0, 0) + +HZSMModel = {'classTable': HZ_cls, + 'classFactor': 6, + 'stateTable': HZ_st, + 'charLenTable': HZCharLenTable, + 'name': "HZ-GB-2312"} + +ISO2022CN_cls = ( +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,3,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,4,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022CN_st = ( +eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 + 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 +eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f +) + +ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022CNSMModel = {'classTable': ISO2022CN_cls, + 'classFactor': 9, + 'stateTable': ISO2022CN_st, + 'charLenTable': ISO2022CNCharLenTable, + 'name': "ISO-2022-CN"} + +ISO2022JP_cls = ( +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,2,2, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,7,0,0,0, # 20 - 27 +3,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +6,0,4,0,8,0,0,0, # 40 - 47 +0,9,5,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022JP_st = ( +eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f +eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 +eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f +eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f +eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 +) + +ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022JPSMModel = {'classTable': ISO2022JP_cls, + 'classFactor': 10, + 'stateTable': ISO2022JP_st, + 'charLenTable': ISO2022JPCharLenTable, + 'name': "ISO-2022-JP"} + +ISO2022KR_cls = ( +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,3,0,0,0, # 20 - 27 +0,4,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,5,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022KR_st = ( +eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 +eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f +eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 +) + +ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) + +ISO2022KRSMModel = {'classTable': ISO2022KR_cls, + 'classFactor': 6, + 'stateTable': ISO2022KR_st, + 'charLenTable': ISO2022KRCharLenTable, + 'name': "ISO-2022-KR"} + +# flake8: noqa diff --git a/botocore/vendored/requests/packages/charade/eucjpprober.py b/botocore/vendored/requests/packages/chardet/eucjpprober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/eucjpprober.py rename to botocore/vendored/requests/packages/chardet/eucjpprober.py index d70cfbbb01..8e64fdcc26 100644 --- a/botocore/vendored/requests/packages/charade/eucjpprober.py +++ b/botocore/vendored/requests/packages/chardet/eucjpprober.py @@ -1,90 +1,90 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -import sys -from . import constants -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import EUCJPDistributionAnalysis -from .jpcntx import EUCJPContextAnalysis -from .mbcssm import EUCJPSMModel - - -class EUCJPProber(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(EUCJPSMModel) - self._mDistributionAnalyzer = EUCJPDistributionAnalysis() - self._mContextAnalyzer = EUCJPContextAnalysis() - self.reset() - - def reset(self): - MultiByteCharSetProber.reset(self) - self._mContextAnalyzer.reset() - - def get_charset_name(self): - return "EUC-JP" - - def feed(self, aBuf): - aLen = len(aBuf) - for i in range(0, aLen): - # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte - codingState = self._mCodingSM.next_state(aBuf[i]) - if codingState == constants.eError: - if constants._debug: - sys.stderr.write(self.get_charset_name() - + ' prober hit error at byte ' + str(i) - + '\n') - self._mState = constants.eNotMe - break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - break - elif codingState == constants.eStart: - charLen = self._mCodingSM.get_current_charlen() - if i == 0: - self._mLastChar[1] = aBuf[0] - self._mContextAnalyzer.feed(self._mLastChar, charLen) - self._mDistributionAnalyzer.feed(self._mLastChar, charLen) - else: - self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) - self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], - charLen) - - self._mLastChar[0] = aBuf[aLen - 1] - - if self.get_state() == constants.eDetecting: - if (self._mContextAnalyzer.got_enough_data() and - (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): - self._mState = constants.eFoundIt - - return self.get_state() - - def get_confidence(self): - contxtCf = self._mContextAnalyzer.get_confidence() - distribCf = self._mDistributionAnalyzer.get_confidence() - return max(contxtCf, distribCf) +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import sys +from . import constants +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCJPDistributionAnalysis +from .jpcntx import EUCJPContextAnalysis +from .mbcssm import EUCJPSMModel + + +class EUCJPProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCJPSMModel) + self._mDistributionAnalyzer = EUCJPDistributionAnalysis() + self._mContextAnalyzer = EUCJPContextAnalysis() + self.reset() + + def reset(self): + MultiByteCharSetProber.reset(self) + self._mContextAnalyzer.reset() + + def get_charset_name(self): + return "EUC-JP" + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == constants.eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + + ' prober hit error at byte ' + str(i) + + '\n') + self._mState = constants.eNotMe + break + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == constants.eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mContextAnalyzer.feed(self._mLastChar, charLen) + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) + self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], + charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if (self._mContextAnalyzer.got_enough_data() and + (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + contxtCf = self._mContextAnalyzer.get_confidence() + distribCf = self._mDistributionAnalyzer.get_confidence() + return max(contxtCf, distribCf) diff --git a/botocore/vendored/requests/packages/charade/euckrfreq.py b/botocore/vendored/requests/packages/chardet/euckrfreq.py similarity index 100% rename from botocore/vendored/requests/packages/charade/euckrfreq.py rename to botocore/vendored/requests/packages/chardet/euckrfreq.py diff --git a/botocore/vendored/requests/packages/charade/euckrprober.py b/botocore/vendored/requests/packages/chardet/euckrprober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/euckrprober.py rename to botocore/vendored/requests/packages/chardet/euckrprober.py index def3e42902..5982a46b60 100644 --- a/botocore/vendored/requests/packages/charade/euckrprober.py +++ b/botocore/vendored/requests/packages/chardet/euckrprober.py @@ -1,42 +1,42 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import EUCKRDistributionAnalysis -from .mbcssm import EUCKRSMModel - - -class EUCKRProber(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(EUCKRSMModel) - self._mDistributionAnalyzer = EUCKRDistributionAnalysis() - self.reset() - - def get_charset_name(self): - return "EUC-KR" +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCKRDistributionAnalysis +from .mbcssm import EUCKRSMModel + + +class EUCKRProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCKRSMModel) + self._mDistributionAnalyzer = EUCKRDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "EUC-KR" diff --git a/botocore/vendored/requests/packages/charade/euctwfreq.py b/botocore/vendored/requests/packages/chardet/euctwfreq.py similarity index 100% rename from botocore/vendored/requests/packages/charade/euctwfreq.py rename to botocore/vendored/requests/packages/chardet/euctwfreq.py diff --git a/botocore/vendored/requests/packages/charade/euctwprober.py b/botocore/vendored/requests/packages/chardet/euctwprober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/euctwprober.py rename to botocore/vendored/requests/packages/chardet/euctwprober.py index e601adfdc6..fe652fe37a 100644 --- a/botocore/vendored/requests/packages/charade/euctwprober.py +++ b/botocore/vendored/requests/packages/chardet/euctwprober.py @@ -1,41 +1,41 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import EUCTWDistributionAnalysis -from .mbcssm import EUCTWSMModel - -class EUCTWProber(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(EUCTWSMModel) - self._mDistributionAnalyzer = EUCTWDistributionAnalysis() - self.reset() - - def get_charset_name(self): - return "EUC-TW" +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCTWDistributionAnalysis +from .mbcssm import EUCTWSMModel + +class EUCTWProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCTWSMModel) + self._mDistributionAnalyzer = EUCTWDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "EUC-TW" diff --git a/botocore/vendored/requests/packages/charade/gb2312freq.py b/botocore/vendored/requests/packages/chardet/gb2312freq.py similarity index 100% rename from botocore/vendored/requests/packages/charade/gb2312freq.py rename to botocore/vendored/requests/packages/chardet/gb2312freq.py diff --git a/botocore/vendored/requests/packages/charade/gb2312prober.py b/botocore/vendored/requests/packages/chardet/gb2312prober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/gb2312prober.py rename to botocore/vendored/requests/packages/chardet/gb2312prober.py index 643fe2519e..0325a2d861 100644 --- a/botocore/vendored/requests/packages/charade/gb2312prober.py +++ b/botocore/vendored/requests/packages/chardet/gb2312prober.py @@ -1,41 +1,41 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import GB2312DistributionAnalysis -from .mbcssm import GB2312SMModel - -class GB2312Prober(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(GB2312SMModel) - self._mDistributionAnalyzer = GB2312DistributionAnalysis() - self.reset() - - def get_charset_name(self): - return "GB2312" +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import GB2312DistributionAnalysis +from .mbcssm import GB2312SMModel + +class GB2312Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(GB2312SMModel) + self._mDistributionAnalyzer = GB2312DistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "GB2312" diff --git a/botocore/vendored/requests/packages/charade/hebrewprober.py b/botocore/vendored/requests/packages/chardet/hebrewprober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/hebrewprober.py rename to botocore/vendored/requests/packages/chardet/hebrewprober.py index 90d171f302..ba225c5ef4 100644 --- a/botocore/vendored/requests/packages/charade/hebrewprober.py +++ b/botocore/vendored/requests/packages/chardet/hebrewprober.py @@ -1,283 +1,283 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Shy Shalom -# Portions created by the Initial Developer are Copyright (C) 2005 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .charsetprober import CharSetProber -from .constants import eNotMe, eDetecting -from .compat import wrap_ord - -# This prober doesn't actually recognize a language or a charset. -# It is a helper prober for the use of the Hebrew model probers - -### General ideas of the Hebrew charset recognition ### -# -# Four main charsets exist in Hebrew: -# "ISO-8859-8" - Visual Hebrew -# "windows-1255" - Logical Hebrew -# "ISO-8859-8-I" - Logical Hebrew -# "x-mac-hebrew" - ?? Logical Hebrew ?? -# -# Both "ISO" charsets use a completely identical set of code points, whereas -# "windows-1255" and "x-mac-hebrew" are two different proper supersets of -# these code points. windows-1255 defines additional characters in the range -# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific -# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. -# x-mac-hebrew defines similar additional code points but with a different -# mapping. -# -# As far as an average Hebrew text with no diacritics is concerned, all four -# charsets are identical with respect to code points. Meaning that for the -# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters -# (including final letters). -# -# The dominant difference between these charsets is their directionality. -# "Visual" directionality means that the text is ordered as if the renderer is -# not aware of a BIDI rendering algorithm. The renderer sees the text and -# draws it from left to right. The text itself when ordered naturally is read -# backwards. A buffer of Visual Hebrew generally looks like so: -# "[last word of first line spelled backwards] [whole line ordered backwards -# and spelled backwards] [first word of first line spelled backwards] -# [end of line] [last word of second line] ... etc' " -# adding punctuation marks, numbers and English text to visual text is -# naturally also "visual" and from left to right. -# -# "Logical" directionality means the text is ordered "naturally" according to -# the order it is read. It is the responsibility of the renderer to display -# the text from right to left. A BIDI algorithm is used to place general -# punctuation marks, numbers and English text in the text. -# -# Texts in x-mac-hebrew are almost impossible to find on the Internet. From -# what little evidence I could find, it seems that its general directionality -# is Logical. -# -# To sum up all of the above, the Hebrew probing mechanism knows about two -# charsets: -# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are -# backwards while line order is natural. For charset recognition purposes -# the line order is unimportant (In fact, for this implementation, even -# word order is unimportant). -# Logical Hebrew - "windows-1255" - normal, naturally ordered text. -# -# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be -# specifically identified. -# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew -# that contain special punctuation marks or diacritics is displayed with -# some unconverted characters showing as question marks. This problem might -# be corrected using another model prober for x-mac-hebrew. Due to the fact -# that x-mac-hebrew texts are so rare, writing another model prober isn't -# worth the effort and performance hit. -# -#### The Prober #### -# -# The prober is divided between two SBCharSetProbers and a HebrewProber, -# all of which are managed, created, fed data, inquired and deleted by the -# SBCSGroupProber. The two SBCharSetProbers identify that the text is in -# fact some kind of Hebrew, Logical or Visual. The final decision about which -# one is it is made by the HebrewProber by combining final-letter scores -# with the scores of the two SBCharSetProbers to produce a final answer. -# -# The SBCSGroupProber is responsible for stripping the original text of HTML -# tags, English characters, numbers, low-ASCII punctuation characters, spaces -# and new lines. It reduces any sequence of such characters to a single space. -# The buffer fed to each prober in the SBCS group prober is pure text in -# high-ASCII. -# The two SBCharSetProbers (model probers) share the same language model: -# Win1255Model. -# The first SBCharSetProber uses the model normally as any other -# SBCharSetProber does, to recognize windows-1255, upon which this model was -# built. The second SBCharSetProber is told to make the pair-of-letter -# lookup in the language model backwards. This in practice exactly simulates -# a visual Hebrew model using the windows-1255 logical Hebrew model. -# -# The HebrewProber is not using any language model. All it does is look for -# final-letter evidence suggesting the text is either logical Hebrew or visual -# Hebrew. Disjointed from the model probers, the results of the HebrewProber -# alone are meaningless. HebrewProber always returns 0.00 as confidence -# since it never identifies a charset by itself. Instead, the pointer to the -# HebrewProber is passed to the model probers as a helper "Name Prober". -# When the Group prober receives a positive identification from any prober, -# it asks for the name of the charset identified. If the prober queried is a -# Hebrew model prober, the model prober forwards the call to the -# HebrewProber to make the final decision. In the HebrewProber, the -# decision is made according to the final-letters scores maintained and Both -# model probers scores. The answer is returned in the form of the name of the -# charset identified, either "windows-1255" or "ISO-8859-8". - -# windows-1255 / ISO-8859-8 code points of interest -FINAL_KAF = 0xea -NORMAL_KAF = 0xeb -FINAL_MEM = 0xed -NORMAL_MEM = 0xee -FINAL_NUN = 0xef -NORMAL_NUN = 0xf0 -FINAL_PE = 0xf3 -NORMAL_PE = 0xf4 -FINAL_TSADI = 0xf5 -NORMAL_TSADI = 0xf6 - -# Minimum Visual vs Logical final letter score difference. -# If the difference is below this, don't rely solely on the final letter score -# distance. -MIN_FINAL_CHAR_DISTANCE = 5 - -# Minimum Visual vs Logical model score difference. -# If the difference is below this, don't rely at all on the model score -# distance. -MIN_MODEL_DISTANCE = 0.01 - -VISUAL_HEBREW_NAME = "ISO-8859-8" -LOGICAL_HEBREW_NAME = "windows-1255" - - -class HebrewProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mLogicalProber = None - self._mVisualProber = None - self.reset() - - def reset(self): - self._mFinalCharLogicalScore = 0 - self._mFinalCharVisualScore = 0 - # The two last characters seen in the previous buffer, - # mPrev and mBeforePrev are initialized to space in order to simulate - # a word delimiter at the beginning of the data - self._mPrev = ' ' - self._mBeforePrev = ' ' - # These probers are owned by the group prober. - - def set_model_probers(self, logicalProber, visualProber): - self._mLogicalProber = logicalProber - self._mVisualProber = visualProber - - def is_final(self, c): - return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, - FINAL_TSADI] - - def is_non_final(self, c): - # The normal Tsadi is not a good Non-Final letter due to words like - # 'lechotet' (to chat) containing an apostrophe after the tsadi. This - # apostrophe is converted to a space in FilterWithoutEnglishLetters - # causing the Non-Final tsadi to appear at an end of a word even - # though this is not the case in the original text. - # The letters Pe and Kaf rarely display a related behavior of not being - # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' - # for example legally end with a Non-Final Pe or Kaf. However, the - # benefit of these letters as Non-Final letters outweighs the damage - # since these words are quite rare. - return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE] - - def feed(self, aBuf): - # Final letter analysis for logical-visual decision. - # Look for evidence that the received buffer is either logical Hebrew - # or visual Hebrew. - # The following cases are checked: - # 1) A word longer than 1 letter, ending with a final letter. This is - # an indication that the text is laid out "naturally" since the - # final letter really appears at the end. +1 for logical score. - # 2) A word longer than 1 letter, ending with a Non-Final letter. In - # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, - # should not end with the Non-Final form of that letter. Exceptions - # to this rule are mentioned above in isNonFinal(). This is an - # indication that the text is laid out backwards. +1 for visual - # score - # 3) A word longer than 1 letter, starting with a final letter. Final - # letters should not appear at the beginning of a word. This is an - # indication that the text is laid out backwards. +1 for visual - # score. - # - # The visual score and logical score are accumulated throughout the - # text and are finally checked against each other in GetCharSetName(). - # No checking for final letters in the middle of words is done since - # that case is not an indication for either Logical or Visual text. - # - # We automatically filter out all 7-bit characters (replace them with - # spaces) so the word boundary detection works properly. [MAP] - - if self.get_state() == eNotMe: - # Both model probers say it's not them. No reason to continue. - return eNotMe - - aBuf = self.filter_high_bit_only(aBuf) - - for cur in aBuf: - if cur == ' ': - # We stand on a space - a word just ended - if self._mBeforePrev != ' ': - # next-to-last char was not a space so self._mPrev is not a - # 1 letter word - if self.is_final(self._mPrev): - # case (1) [-2:not space][-1:final letter][cur:space] - self._mFinalCharLogicalScore += 1 - elif self.is_non_final(self._mPrev): - # case (2) [-2:not space][-1:Non-Final letter][ - # cur:space] - self._mFinalCharVisualScore += 1 - else: - # Not standing on a space - if ((self._mBeforePrev == ' ') and - (self.is_final(self._mPrev)) and (cur != ' ')): - # case (3) [-2:space][-1:final letter][cur:not space] - self._mFinalCharVisualScore += 1 - self._mBeforePrev = self._mPrev - self._mPrev = cur - - # Forever detecting, till the end or until both model probers return - # eNotMe (handled above) - return eDetecting - - def get_charset_name(self): - # Make the decision: is it Logical or Visual? - # If the final letter score distance is dominant enough, rely on it. - finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore - if finalsub >= MIN_FINAL_CHAR_DISTANCE: - return LOGICAL_HEBREW_NAME - if finalsub <= -MIN_FINAL_CHAR_DISTANCE: - return VISUAL_HEBREW_NAME - - # It's not dominant enough, try to rely on the model scores instead. - modelsub = (self._mLogicalProber.get_confidence() - - self._mVisualProber.get_confidence()) - if modelsub > MIN_MODEL_DISTANCE: - return LOGICAL_HEBREW_NAME - if modelsub < -MIN_MODEL_DISTANCE: - return VISUAL_HEBREW_NAME - - # Still no good, back to final letter distance, maybe it'll save the - # day. - if finalsub < 0.0: - return VISUAL_HEBREW_NAME - - # (finalsub > 0 - Logical) or (don't know what to do) default to - # Logical. - return LOGICAL_HEBREW_NAME - - def get_state(self): - # Remain active as long as any of the model probers are active. - if (self._mLogicalProber.get_state() == eNotMe) and \ - (self._mVisualProber.get_state() == eNotMe): - return eNotMe - return eDetecting +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Shy Shalom +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .constants import eNotMe, eDetecting +from .compat import wrap_ord + +# This prober doesn't actually recognize a language or a charset. +# It is a helper prober for the use of the Hebrew model probers + +### General ideas of the Hebrew charset recognition ### +# +# Four main charsets exist in Hebrew: +# "ISO-8859-8" - Visual Hebrew +# "windows-1255" - Logical Hebrew +# "ISO-8859-8-I" - Logical Hebrew +# "x-mac-hebrew" - ?? Logical Hebrew ?? +# +# Both "ISO" charsets use a completely identical set of code points, whereas +# "windows-1255" and "x-mac-hebrew" are two different proper supersets of +# these code points. windows-1255 defines additional characters in the range +# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific +# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. +# x-mac-hebrew defines similar additional code points but with a different +# mapping. +# +# As far as an average Hebrew text with no diacritics is concerned, all four +# charsets are identical with respect to code points. Meaning that for the +# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters +# (including final letters). +# +# The dominant difference between these charsets is their directionality. +# "Visual" directionality means that the text is ordered as if the renderer is +# not aware of a BIDI rendering algorithm. The renderer sees the text and +# draws it from left to right. The text itself when ordered naturally is read +# backwards. A buffer of Visual Hebrew generally looks like so: +# "[last word of first line spelled backwards] [whole line ordered backwards +# and spelled backwards] [first word of first line spelled backwards] +# [end of line] [last word of second line] ... etc' " +# adding punctuation marks, numbers and English text to visual text is +# naturally also "visual" and from left to right. +# +# "Logical" directionality means the text is ordered "naturally" according to +# the order it is read. It is the responsibility of the renderer to display +# the text from right to left. A BIDI algorithm is used to place general +# punctuation marks, numbers and English text in the text. +# +# Texts in x-mac-hebrew are almost impossible to find on the Internet. From +# what little evidence I could find, it seems that its general directionality +# is Logical. +# +# To sum up all of the above, the Hebrew probing mechanism knows about two +# charsets: +# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are +# backwards while line order is natural. For charset recognition purposes +# the line order is unimportant (In fact, for this implementation, even +# word order is unimportant). +# Logical Hebrew - "windows-1255" - normal, naturally ordered text. +# +# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be +# specifically identified. +# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew +# that contain special punctuation marks or diacritics is displayed with +# some unconverted characters showing as question marks. This problem might +# be corrected using another model prober for x-mac-hebrew. Due to the fact +# that x-mac-hebrew texts are so rare, writing another model prober isn't +# worth the effort and performance hit. +# +#### The Prober #### +# +# The prober is divided between two SBCharSetProbers and a HebrewProber, +# all of which are managed, created, fed data, inquired and deleted by the +# SBCSGroupProber. The two SBCharSetProbers identify that the text is in +# fact some kind of Hebrew, Logical or Visual. The final decision about which +# one is it is made by the HebrewProber by combining final-letter scores +# with the scores of the two SBCharSetProbers to produce a final answer. +# +# The SBCSGroupProber is responsible for stripping the original text of HTML +# tags, English characters, numbers, low-ASCII punctuation characters, spaces +# and new lines. It reduces any sequence of such characters to a single space. +# The buffer fed to each prober in the SBCS group prober is pure text in +# high-ASCII. +# The two SBCharSetProbers (model probers) share the same language model: +# Win1255Model. +# The first SBCharSetProber uses the model normally as any other +# SBCharSetProber does, to recognize windows-1255, upon which this model was +# built. The second SBCharSetProber is told to make the pair-of-letter +# lookup in the language model backwards. This in practice exactly simulates +# a visual Hebrew model using the windows-1255 logical Hebrew model. +# +# The HebrewProber is not using any language model. All it does is look for +# final-letter evidence suggesting the text is either logical Hebrew or visual +# Hebrew. Disjointed from the model probers, the results of the HebrewProber +# alone are meaningless. HebrewProber always returns 0.00 as confidence +# since it never identifies a charset by itself. Instead, the pointer to the +# HebrewProber is passed to the model probers as a helper "Name Prober". +# When the Group prober receives a positive identification from any prober, +# it asks for the name of the charset identified. If the prober queried is a +# Hebrew model prober, the model prober forwards the call to the +# HebrewProber to make the final decision. In the HebrewProber, the +# decision is made according to the final-letters scores maintained and Both +# model probers scores. The answer is returned in the form of the name of the +# charset identified, either "windows-1255" or "ISO-8859-8". + +# windows-1255 / ISO-8859-8 code points of interest +FINAL_KAF = 0xea +NORMAL_KAF = 0xeb +FINAL_MEM = 0xed +NORMAL_MEM = 0xee +FINAL_NUN = 0xef +NORMAL_NUN = 0xf0 +FINAL_PE = 0xf3 +NORMAL_PE = 0xf4 +FINAL_TSADI = 0xf5 +NORMAL_TSADI = 0xf6 + +# Minimum Visual vs Logical final letter score difference. +# If the difference is below this, don't rely solely on the final letter score +# distance. +MIN_FINAL_CHAR_DISTANCE = 5 + +# Minimum Visual vs Logical model score difference. +# If the difference is below this, don't rely at all on the model score +# distance. +MIN_MODEL_DISTANCE = 0.01 + +VISUAL_HEBREW_NAME = "ISO-8859-8" +LOGICAL_HEBREW_NAME = "windows-1255" + + +class HebrewProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mLogicalProber = None + self._mVisualProber = None + self.reset() + + def reset(self): + self._mFinalCharLogicalScore = 0 + self._mFinalCharVisualScore = 0 + # The two last characters seen in the previous buffer, + # mPrev and mBeforePrev are initialized to space in order to simulate + # a word delimiter at the beginning of the data + self._mPrev = ' ' + self._mBeforePrev = ' ' + # These probers are owned by the group prober. + + def set_model_probers(self, logicalProber, visualProber): + self._mLogicalProber = logicalProber + self._mVisualProber = visualProber + + def is_final(self, c): + return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, + FINAL_TSADI] + + def is_non_final(self, c): + # The normal Tsadi is not a good Non-Final letter due to words like + # 'lechotet' (to chat) containing an apostrophe after the tsadi. This + # apostrophe is converted to a space in FilterWithoutEnglishLetters + # causing the Non-Final tsadi to appear at an end of a word even + # though this is not the case in the original text. + # The letters Pe and Kaf rarely display a related behavior of not being + # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' + # for example legally end with a Non-Final Pe or Kaf. However, the + # benefit of these letters as Non-Final letters outweighs the damage + # since these words are quite rare. + return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE] + + def feed(self, aBuf): + # Final letter analysis for logical-visual decision. + # Look for evidence that the received buffer is either logical Hebrew + # or visual Hebrew. + # The following cases are checked: + # 1) A word longer than 1 letter, ending with a final letter. This is + # an indication that the text is laid out "naturally" since the + # final letter really appears at the end. +1 for logical score. + # 2) A word longer than 1 letter, ending with a Non-Final letter. In + # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, + # should not end with the Non-Final form of that letter. Exceptions + # to this rule are mentioned above in isNonFinal(). This is an + # indication that the text is laid out backwards. +1 for visual + # score + # 3) A word longer than 1 letter, starting with a final letter. Final + # letters should not appear at the beginning of a word. This is an + # indication that the text is laid out backwards. +1 for visual + # score. + # + # The visual score and logical score are accumulated throughout the + # text and are finally checked against each other in GetCharSetName(). + # No checking for final letters in the middle of words is done since + # that case is not an indication for either Logical or Visual text. + # + # We automatically filter out all 7-bit characters (replace them with + # spaces) so the word boundary detection works properly. [MAP] + + if self.get_state() == eNotMe: + # Both model probers say it's not them. No reason to continue. + return eNotMe + + aBuf = self.filter_high_bit_only(aBuf) + + for cur in aBuf: + if cur == ' ': + # We stand on a space - a word just ended + if self._mBeforePrev != ' ': + # next-to-last char was not a space so self._mPrev is not a + # 1 letter word + if self.is_final(self._mPrev): + # case (1) [-2:not space][-1:final letter][cur:space] + self._mFinalCharLogicalScore += 1 + elif self.is_non_final(self._mPrev): + # case (2) [-2:not space][-1:Non-Final letter][ + # cur:space] + self._mFinalCharVisualScore += 1 + else: + # Not standing on a space + if ((self._mBeforePrev == ' ') and + (self.is_final(self._mPrev)) and (cur != ' ')): + # case (3) [-2:space][-1:final letter][cur:not space] + self._mFinalCharVisualScore += 1 + self._mBeforePrev = self._mPrev + self._mPrev = cur + + # Forever detecting, till the end or until both model probers return + # eNotMe (handled above) + return eDetecting + + def get_charset_name(self): + # Make the decision: is it Logical or Visual? + # If the final letter score distance is dominant enough, rely on it. + finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore + if finalsub >= MIN_FINAL_CHAR_DISTANCE: + return LOGICAL_HEBREW_NAME + if finalsub <= -MIN_FINAL_CHAR_DISTANCE: + return VISUAL_HEBREW_NAME + + # It's not dominant enough, try to rely on the model scores instead. + modelsub = (self._mLogicalProber.get_confidence() + - self._mVisualProber.get_confidence()) + if modelsub > MIN_MODEL_DISTANCE: + return LOGICAL_HEBREW_NAME + if modelsub < -MIN_MODEL_DISTANCE: + return VISUAL_HEBREW_NAME + + # Still no good, back to final letter distance, maybe it'll save the + # day. + if finalsub < 0.0: + return VISUAL_HEBREW_NAME + + # (finalsub > 0 - Logical) or (don't know what to do) default to + # Logical. + return LOGICAL_HEBREW_NAME + + def get_state(self): + # Remain active as long as any of the model probers are active. + if (self._mLogicalProber.get_state() == eNotMe) and \ + (self._mVisualProber.get_state() == eNotMe): + return eNotMe + return eDetecting diff --git a/botocore/vendored/requests/packages/charade/jisfreq.py b/botocore/vendored/requests/packages/chardet/jisfreq.py similarity index 100% rename from botocore/vendored/requests/packages/charade/jisfreq.py rename to botocore/vendored/requests/packages/chardet/jisfreq.py diff --git a/botocore/vendored/requests/packages/charade/jpcntx.py b/botocore/vendored/requests/packages/chardet/jpcntx.py similarity index 98% rename from botocore/vendored/requests/packages/charade/jpcntx.py rename to botocore/vendored/requests/packages/chardet/jpcntx.py index e4e9e4da51..59aeb6a878 100644 --- a/botocore/vendored/requests/packages/charade/jpcntx.py +++ b/botocore/vendored/requests/packages/chardet/jpcntx.py @@ -1,219 +1,227 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .compat import wrap_ord - -NUM_OF_CATEGORY = 6 -DONT_KNOW = -1 -ENOUGH_REL_THRESHOLD = 100 -MAX_REL_THRESHOLD = 1000 -MINIMUM_DATA_THRESHOLD = 4 - -# This is hiragana 2-char sequence table, the number in each cell represents its frequency category -jp2CharContext = ( -(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), -(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), -(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), -(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), -(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), -(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), -(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), -(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), -(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), -(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), -(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), -(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), -(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), -(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), -(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), -(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), -(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), -(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), -(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), -(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), -(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), -(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), -(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), -(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), -(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), -(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), -(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), -(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), -(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), -(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), -(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), -(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), -(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), -(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), -(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), -(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), -(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), -(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), -(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), -(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), -(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), -(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), -(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), -(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), -(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), -(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), -(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), -(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), -(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), -(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), -(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), -(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), -(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), -(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), -(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), -(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), -(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), -(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), -(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), -(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), -(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), -(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), -(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), -(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), -(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), -(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), -(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), -(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), -(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), -(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), -(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), -(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), -(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), -(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), -) - -class JapaneseContextAnalysis: - def __init__(self): - self.reset() - - def reset(self): - self._mTotalRel = 0 # total sequence received - # category counters, each interger counts sequence in its category - self._mRelSample = [0] * NUM_OF_CATEGORY - # if last byte in current buffer is not the last byte of a character, - # we need to know how many bytes to skip in next buffer - self._mNeedToSkipCharNum = 0 - self._mLastCharOrder = -1 # The order of previous char - # If this flag is set to True, detection is done and conclusion has - # been made - self._mDone = False - - def feed(self, aBuf, aLen): - if self._mDone: - return - - # The buffer we got is byte oriented, and a character may span in more than one - # buffers. In case the last one or two byte in last buffer is not - # complete, we record how many byte needed to complete that character - # and skip these bytes here. We can choose to record those bytes as - # well and analyse the character once it is complete, but since a - # character will not make much difference, by simply skipping - # this character will simply our logic and improve performance. - i = self._mNeedToSkipCharNum - while i < aLen: - order, charLen = self.get_order(aBuf[i:i + 2]) - i += charLen - if i > aLen: - self._mNeedToSkipCharNum = i - aLen - self._mLastCharOrder = -1 - else: - if (order != -1) and (self._mLastCharOrder != -1): - self._mTotalRel += 1 - if self._mTotalRel > MAX_REL_THRESHOLD: - self._mDone = True - break - self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1 - self._mLastCharOrder = order - - def got_enough_data(self): - return self._mTotalRel > ENOUGH_REL_THRESHOLD - - def get_confidence(self): - # This is just one way to calculate confidence. It works well for me. - if self._mTotalRel > MINIMUM_DATA_THRESHOLD: - return float(self._mTotalRel - self._mRelSample[0]) / self._mTotalRel - else: - return DONT_KNOW - - def get_order(self, aBuf): - return -1, 1 - -class SJISContextAnalysis(JapaneseContextAnalysis): - def get_order(self, aBuf): - if not aBuf: - return -1, 1 - # find out current char's byte length - first_char = wrap_ord(aBuf[0]) - if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)): - charLen = 2 - else: - charLen = 1 - - # return its order if it is hiragana - if len(aBuf) > 1: - second_char = wrap_ord(aBuf[1]) - if (first_char == 202) and (0x9F <= second_char <= 0xF1): - return second_char - 0x9F, charLen - - return -1, charLen - -class EUCJPContextAnalysis(JapaneseContextAnalysis): - def get_order(self, aBuf): - if not aBuf: - return -1, 1 - # find out current char's byte length - first_char = wrap_ord(aBuf[0]) - if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE): - charLen = 2 - elif first_char == 0x8F: - charLen = 3 - else: - charLen = 1 - - # return its order if it is hiragana - if len(aBuf) > 1: - second_char = wrap_ord(aBuf[1]) - if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3): - return second_char - 0xA1, charLen - - return -1, charLen - -# flake8: noqa +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .compat import wrap_ord + +NUM_OF_CATEGORY = 6 +DONT_KNOW = -1 +ENOUGH_REL_THRESHOLD = 100 +MAX_REL_THRESHOLD = 1000 +MINIMUM_DATA_THRESHOLD = 4 + +# This is hiragana 2-char sequence table, the number in each cell represents its frequency category +jp2CharContext = ( +(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), +(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), +(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), +(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), +(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), +(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), +(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), +(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), +(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), +(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), +(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), +(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), +(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), +(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), +(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), +(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), +(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), +(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), +(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), +(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), +(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), +(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), +(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), +(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), +(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), +(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), +(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), +(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), +(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), +(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), +(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), +(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), +(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), +(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), +(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), +(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), +(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), +(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), +(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), +(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), +(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), +(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), +(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), +(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), +(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), +(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), +(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), +(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), +(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), +(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), +(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), +(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), +(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), +(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), +(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), +(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), +(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), +(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), +(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), +(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), +(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), +(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), +(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), +(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), +(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), +(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), +(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), +(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), +(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), +(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), +(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), +(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), +(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), +(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), +) + +class JapaneseContextAnalysis: + def __init__(self): + self.reset() + + def reset(self): + self._mTotalRel = 0 # total sequence received + # category counters, each interger counts sequence in its category + self._mRelSample = [0] * NUM_OF_CATEGORY + # if last byte in current buffer is not the last byte of a character, + # we need to know how many bytes to skip in next buffer + self._mNeedToSkipCharNum = 0 + self._mLastCharOrder = -1 # The order of previous char + # If this flag is set to True, detection is done and conclusion has + # been made + self._mDone = False + + def feed(self, aBuf, aLen): + if self._mDone: + return + + # The buffer we got is byte oriented, and a character may span in more than one + # buffers. In case the last one or two byte in last buffer is not + # complete, we record how many byte needed to complete that character + # and skip these bytes here. We can choose to record those bytes as + # well and analyse the character once it is complete, but since a + # character will not make much difference, by simply skipping + # this character will simply our logic and improve performance. + i = self._mNeedToSkipCharNum + while i < aLen: + order, charLen = self.get_order(aBuf[i:i + 2]) + i += charLen + if i > aLen: + self._mNeedToSkipCharNum = i - aLen + self._mLastCharOrder = -1 + else: + if (order != -1) and (self._mLastCharOrder != -1): + self._mTotalRel += 1 + if self._mTotalRel > MAX_REL_THRESHOLD: + self._mDone = True + break + self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1 + self._mLastCharOrder = order + + def got_enough_data(self): + return self._mTotalRel > ENOUGH_REL_THRESHOLD + + def get_confidence(self): + # This is just one way to calculate confidence. It works well for me. + if self._mTotalRel > MINIMUM_DATA_THRESHOLD: + return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel + else: + return DONT_KNOW + + def get_order(self, aBuf): + return -1, 1 + +class SJISContextAnalysis(JapaneseContextAnalysis): + def __init__(self): + self.charset_name = "SHIFT_JIS" + + def get_charset_name(self): + return self.charset_name + + def get_order(self, aBuf): + if not aBuf: + return -1, 1 + # find out current char's byte length + first_char = wrap_ord(aBuf[0]) + if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)): + charLen = 2 + if (first_char == 0x87) or (0xFA <= first_char <= 0xFC): + self.charset_name = "CP932" + else: + charLen = 1 + + # return its order if it is hiragana + if len(aBuf) > 1: + second_char = wrap_ord(aBuf[1]) + if (first_char == 202) and (0x9F <= second_char <= 0xF1): + return second_char - 0x9F, charLen + + return -1, charLen + +class EUCJPContextAnalysis(JapaneseContextAnalysis): + def get_order(self, aBuf): + if not aBuf: + return -1, 1 + # find out current char's byte length + first_char = wrap_ord(aBuf[0]) + if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE): + charLen = 2 + elif first_char == 0x8F: + charLen = 3 + else: + charLen = 1 + + # return its order if it is hiragana + if len(aBuf) > 1: + second_char = wrap_ord(aBuf[1]) + if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3): + return second_char - 0xA1, charLen + + return -1, charLen + +# flake8: noqa diff --git a/botocore/vendored/requests/packages/charade/langbulgarianmodel.py b/botocore/vendored/requests/packages/chardet/langbulgarianmodel.py similarity index 98% rename from botocore/vendored/requests/packages/charade/langbulgarianmodel.py rename to botocore/vendored/requests/packages/chardet/langbulgarianmodel.py index ea5a60ba04..e5788fc64a 100644 --- a/botocore/vendored/requests/packages/charade/langbulgarianmodel.py +++ b/botocore/vendored/requests/packages/chardet/langbulgarianmodel.py @@ -1,229 +1,229 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# 255: Control characters that usually does not exist in any text -# 254: Carriage/Return -# 253: symbol (punctuation) that does not belong to word -# 252: 0 - 9 - -# Character Mapping Table: -# this table is modified base on win1251BulgarianCharToOrderMap, so -# only number <64 is sure valid - -Latin5_BulgarianCharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 -110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 -253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 -116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 -194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80 -210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90 - 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0 - 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0 - 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0 - 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0 - 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0 - 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0 -) - -win1251BulgarianCharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 -110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 -253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 -116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 -206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80 -221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90 - 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0 - 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0 - 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0 - 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0 - 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0 - 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0 -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 96.9392% -# first 1024 sequences:3.0618% -# rest sequences: 0.2992% -# negative sequences: 0.0020% -BulgarianLangModel = ( -0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, -3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1, -0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0, -0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0, -0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0, -1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0, -0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0, -0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3, -2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1, -3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, -3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2, -1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0, -3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1, -1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0, -2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2, -2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0, -3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2, -1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0, -2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2, -2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, -3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2, -1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0, -2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2, -2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0, -2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2, -1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0, -2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2, -1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0, -3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2, -1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0, -3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1, -1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0, -2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1, -1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0, -2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2, -1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0, -2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1, -1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0, -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, -1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2, -1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1, -2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2, -1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, -2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2, -1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1, -0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2, -1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, -2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1, -1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0, -1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1, -0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1, -0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, -0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, -2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, -1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, -0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, -0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, -1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1, -1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, -1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -) - -Latin5BulgarianModel = { - 'charToOrderMap': Latin5_BulgarianCharToOrderMap, - 'precedenceMatrix': BulgarianLangModel, - 'mTypicalPositiveRatio': 0.969392, - 'keepEnglishLetter': False, - 'charsetName': "ISO-8859-5" -} - -Win1251BulgarianModel = { - 'charToOrderMap': win1251BulgarianCharToOrderMap, - 'precedenceMatrix': BulgarianLangModel, - 'mTypicalPositiveRatio': 0.969392, - 'keepEnglishLetter': False, - 'charsetName': "windows-1251" -} - - -# flake8: noqa +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +# this table is modified base on win1251BulgarianCharToOrderMap, so +# only number <64 is sure valid + +Latin5_BulgarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80 +210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90 + 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0 + 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0 +) + +win1251BulgarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80 +221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90 + 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0 + 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 96.9392% +# first 1024 sequences:3.0618% +# rest sequences: 0.2992% +# negative sequences: 0.0020% +BulgarianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, +3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1, +0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0, +0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0, +0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0, +0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0, +0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3, +2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1, +3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2, +1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0, +3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1, +1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0, +2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2, +2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0, +3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2, +1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0, +2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2, +2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2, +1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0, +2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2, +2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0, +2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2, +1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0, +2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2, +1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0, +3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2, +1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0, +3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1, +1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0, +2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1, +1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0, +2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2, +1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0, +2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1, +1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2, +1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1, +0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2, +1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1, +1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1, +0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1, +1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +) + +Latin5BulgarianModel = { + 'charToOrderMap': Latin5_BulgarianCharToOrderMap, + 'precedenceMatrix': BulgarianLangModel, + 'mTypicalPositiveRatio': 0.969392, + 'keepEnglishLetter': False, + 'charsetName': "ISO-8859-5" +} + +Win1251BulgarianModel = { + 'charToOrderMap': win1251BulgarianCharToOrderMap, + 'precedenceMatrix': BulgarianLangModel, + 'mTypicalPositiveRatio': 0.969392, + 'keepEnglishLetter': False, + 'charsetName': "windows-1251" +} + + +# flake8: noqa diff --git a/botocore/vendored/requests/packages/charade/langcyrillicmodel.py b/botocore/vendored/requests/packages/chardet/langcyrillicmodel.py similarity index 98% rename from botocore/vendored/requests/packages/charade/langcyrillicmodel.py rename to botocore/vendored/requests/packages/chardet/langcyrillicmodel.py index 15e338fc11..a86f54bd54 100644 --- a/botocore/vendored/requests/packages/charade/langcyrillicmodel.py +++ b/botocore/vendored/requests/packages/chardet/langcyrillicmodel.py @@ -1,329 +1,329 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# KOI8-R language model -# Character Mapping Table: -KOI8R_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80 -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90 -223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0 -238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0 - 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0 - 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0 - 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0 - 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 -) - -win1251_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, -239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, - 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, - 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, -) - -latin5_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, - 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, - 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, -239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, -) - -macCyrillic_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, -239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, - 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, - 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, -) - -IBM855_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 -191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, -206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, - 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, -220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229, -230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, - 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, - 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, -250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, -) - -IBM866_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, - 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, - 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, -239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 97.6601% -# first 1024 sequences: 2.3389% -# rest sequences: 0.1237% -# negative sequences: 0.0009% -RussianLangModel = ( -0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, -3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, -0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, -0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, -1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, -1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, -2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, -1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, -3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, -1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, -2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, -1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, -1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, -1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, -2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, -1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, -3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, -1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, -2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, -1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, -2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, -1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, -1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, -1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, -3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, -2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, -3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, -1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, -1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, -0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, -2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, -1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, -1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, -0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, -1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, -2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, -2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, -1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, -1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, -2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, -1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, -0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, -2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, -1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, -1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, -0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, -0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, -0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, -0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, -0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, -1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, -0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, -2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, -0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, -) - -Koi8rModel = { - 'charToOrderMap': KOI8R_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "KOI8-R" -} - -Win1251CyrillicModel = { - 'charToOrderMap': win1251_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "windows-1251" -} - -Latin5CyrillicModel = { - 'charToOrderMap': latin5_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "ISO-8859-5" -} - -MacCyrillicModel = { - 'charToOrderMap': macCyrillic_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "MacCyrillic" -}; - -Ibm866Model = { - 'charToOrderMap': IBM866_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "IBM866" -} - -Ibm855Model = { - 'charToOrderMap': IBM855_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "IBM855" -} - -# flake8: noqa +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# KOI8-R language model +# Character Mapping Table: +KOI8R_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80 +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90 +223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0 +238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0 + 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0 + 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0 + 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0 + 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 +) + +win1251_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +) + +latin5_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +macCyrillic_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, +) + +IBM855_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, +206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, + 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, +220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229, +230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, + 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, + 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, +250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, +) + +IBM866_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 97.6601% +# first 1024 sequences: 2.3389% +# rest sequences: 0.1237% +# negative sequences: 0.0009% +RussianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, +1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, +1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, +2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, +1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, +3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, +1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, +2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, +1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, +1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, +1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, +3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, +1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, +2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, +1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, +2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, +1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, +1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, +1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, +3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, +3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, +1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, +1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, +0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, +1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, +1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, +1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, +2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, +1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, +1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, +2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, +1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, +1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, +0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, +0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, +0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, +2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +) + +Koi8rModel = { + 'charToOrderMap': KOI8R_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "KOI8-R" +} + +Win1251CyrillicModel = { + 'charToOrderMap': win1251_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "windows-1251" +} + +Latin5CyrillicModel = { + 'charToOrderMap': latin5_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "ISO-8859-5" +} + +MacCyrillicModel = { + 'charToOrderMap': macCyrillic_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "MacCyrillic" +}; + +Ibm866Model = { + 'charToOrderMap': IBM866_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "IBM866" +} + +Ibm855Model = { + 'charToOrderMap': IBM855_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "IBM855" +} + +# flake8: noqa diff --git a/botocore/vendored/requests/packages/charade/langgreekmodel.py b/botocore/vendored/requests/packages/chardet/langgreekmodel.py similarity index 98% rename from botocore/vendored/requests/packages/charade/langgreekmodel.py rename to botocore/vendored/requests/packages/chardet/langgreekmodel.py index 93241ce26b..ddb5837655 100644 --- a/botocore/vendored/requests/packages/charade/langgreekmodel.py +++ b/botocore/vendored/requests/packages/chardet/langgreekmodel.py @@ -1,225 +1,225 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# 255: Control characters that usually does not exist in any text -# 254: Carriage/Return -# 253: symbol (punctuation) that does not belong to word -# 252: 0 - 9 - -# Character Mapping Table: -Latin7_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 - 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 -253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 - 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 -253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 -253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0 -110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 - 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 -124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 - 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 -) - -win1253_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 - 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 -253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 - 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 -253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 -253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0 -110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 - 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 -124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 - 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 98.2851% -# first 1024 sequences:1.7001% -# rest sequences: 0.0359% -# negative sequences: 0.0148% -GreekLangModel = ( -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, -3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, -0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, -2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, -0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, -2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, -2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, -0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, -2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, -0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, -3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, -3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, -2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, -2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, -0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, -0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, -0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, -0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, -0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, -0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, -0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, -0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, -0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, -0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, -0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, -0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, -0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, -0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, -0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, -0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, -0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, -0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, -0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, -0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, -0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, -0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, -0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, -0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, -0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, -0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, -0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, -0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, -0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, -0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, -0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, -0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, -0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, -0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, -0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -) - -Latin7GreekModel = { - 'charToOrderMap': Latin7_CharToOrderMap, - 'precedenceMatrix': GreekLangModel, - 'mTypicalPositiveRatio': 0.982851, - 'keepEnglishLetter': False, - 'charsetName': "ISO-8859-7" -} - -Win1253GreekModel = { - 'charToOrderMap': win1253_CharToOrderMap, - 'precedenceMatrix': GreekLangModel, - 'mTypicalPositiveRatio': 0.982851, - 'keepEnglishLetter': False, - 'charsetName': "windows-1253" -} - -# flake8: noqa +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin7_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +win1253_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.2851% +# first 1024 sequences:1.7001% +# rest sequences: 0.0359% +# negative sequences: 0.0148% +GreekLangModel = ( +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, +2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, +2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, +2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, +0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, +3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, +2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, +0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, +0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, +0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, +0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, +0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, +0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, +0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, +0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, +0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, +0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, +0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, +0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, +0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, +0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, +0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, +0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, +0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin7GreekModel = { + 'charToOrderMap': Latin7_CharToOrderMap, + 'precedenceMatrix': GreekLangModel, + 'mTypicalPositiveRatio': 0.982851, + 'keepEnglishLetter': False, + 'charsetName': "ISO-8859-7" +} + +Win1253GreekModel = { + 'charToOrderMap': win1253_CharToOrderMap, + 'precedenceMatrix': GreekLangModel, + 'mTypicalPositiveRatio': 0.982851, + 'keepEnglishLetter': False, + 'charsetName': "windows-1253" +} + +# flake8: noqa diff --git a/botocore/vendored/requests/packages/charade/langhebrewmodel.py b/botocore/vendored/requests/packages/chardet/langhebrewmodel.py similarity index 98% rename from botocore/vendored/requests/packages/charade/langhebrewmodel.py rename to botocore/vendored/requests/packages/chardet/langhebrewmodel.py index d87132446d..75f2bc7fe7 100644 --- a/botocore/vendored/requests/packages/charade/langhebrewmodel.py +++ b/botocore/vendored/requests/packages/chardet/langhebrewmodel.py @@ -1,201 +1,201 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Simon Montagu -# Portions created by the Initial Developer are Copyright (C) 2005 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# Shoshannah Forbes - original C code (?) -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# 255: Control characters that usually does not exist in any text -# 254: Carriage/Return -# 253: symbol (punctuation) that does not belong to word -# 252: 0 - 9 - -# Windows-1255 language model -# Character Mapping Table: -win1255_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40 - 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50 -253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60 - 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70 -124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, -215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, - 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, -106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, - 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, -238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, - 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, - 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253, -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 98.4004% -# first 1024 sequences: 1.5981% -# rest sequences: 0.087% -# negative sequences: 0.0015% -HebrewLangModel = ( -0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, -3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, -1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, -1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, -1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, -1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, -1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, -0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, -0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, -1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, -0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, -0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, -0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, -0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, -0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, -0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, -0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, -0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, -0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, -0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, -0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, -0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, -1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, -0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, -0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, -0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, -0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, -0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, -2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, -0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, -0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, -1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, -0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, -2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, -1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, -2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, -1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, -2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, -0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, -1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, -0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, -) - -Win1255HebrewModel = { - 'charToOrderMap': win1255_CharToOrderMap, - 'precedenceMatrix': HebrewLangModel, - 'mTypicalPositiveRatio': 0.984004, - 'keepEnglishLetter': False, - 'charsetName': "windows-1255" -} - -# flake8: noqa +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Simon Montagu +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Shoshannah Forbes - original C code (?) +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Windows-1255 language model +# Character Mapping Table: +win1255_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40 + 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50 +253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60 + 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70 +124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, +215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, + 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, +106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, + 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, +238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, + 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, + 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.4004% +# first 1024 sequences: 1.5981% +# rest sequences: 0.087% +# negative sequences: 0.0015% +HebrewLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, +3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, +1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, +1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, +1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, +1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, +0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, +0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, +0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, +0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, +0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, +0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, +0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, +0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, +0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, +1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, +0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, +0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, +0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, +0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, +1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, +2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, +0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, +) + +Win1255HebrewModel = { + 'charToOrderMap': win1255_CharToOrderMap, + 'precedenceMatrix': HebrewLangModel, + 'mTypicalPositiveRatio': 0.984004, + 'keepEnglishLetter': False, + 'charsetName': "windows-1255" +} + +# flake8: noqa diff --git a/botocore/vendored/requests/packages/charade/langhungarianmodel.py b/botocore/vendored/requests/packages/chardet/langhungarianmodel.py similarity index 98% rename from botocore/vendored/requests/packages/charade/langhungarianmodel.py rename to botocore/vendored/requests/packages/chardet/langhungarianmodel.py index 6f59c61260..49d2f0fe75 100644 --- a/botocore/vendored/requests/packages/charade/langhungarianmodel.py +++ b/botocore/vendored/requests/packages/chardet/langhungarianmodel.py @@ -1,225 +1,225 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# 255: Control characters that usually does not exist in any text -# 254: Carriage/Return -# 253: symbol (punctuation) that does not belong to word -# 252: 0 - 9 - -# Character Mapping Table: -Latin2_HungarianCharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, - 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, -253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, - 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, -159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, -175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, -191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, - 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, -221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, -232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, - 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, -245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, -) - -win1250HungarianCharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, - 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, -253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, - 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, -161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, -177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, -191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, - 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, -221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, -232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, - 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, -245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 94.7368% -# first 1024 sequences:5.2623% -# rest sequences: 0.8894% -# negative sequences: 0.0009% -HungarianLangModel = ( -0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, -3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, -3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, -0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, -3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, -0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, -3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, -3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, -3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, -1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, -1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, -1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, -3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, -2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, -2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, -2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, -2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, -2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, -3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, -2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, -2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, -2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, -1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, -1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, -3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, -1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, -1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, -2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, -2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, -2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, -3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, -2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, -1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, -1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, -2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, -2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, -1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, -1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, -2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, -1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, -1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, -2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, -2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, -2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, -1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, -1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, -1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, -0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, -2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, -2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, -1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, -2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, -1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, -1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, -2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, -2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, -2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, -1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, -2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, -0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, -1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, -0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, -1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, -0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, -2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, -0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, -) - -Latin2HungarianModel = { - 'charToOrderMap': Latin2_HungarianCharToOrderMap, - 'precedenceMatrix': HungarianLangModel, - 'mTypicalPositiveRatio': 0.947368, - 'keepEnglishLetter': True, - 'charsetName': "ISO-8859-2" -} - -Win1250HungarianModel = { - 'charToOrderMap': win1250HungarianCharToOrderMap, - 'precedenceMatrix': HungarianLangModel, - 'mTypicalPositiveRatio': 0.947368, - 'keepEnglishLetter': True, - 'charsetName': "windows-1250" -} - -# flake8: noqa +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin2_HungarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, +175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, + 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, + 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, +245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +win1250HungarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, +177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, + 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, + 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, +245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 94.7368% +# first 1024 sequences:5.2623% +# rest sequences: 0.8894% +# negative sequences: 0.0009% +HungarianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, +3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, +0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, +1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, +1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, +3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, +2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, +2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, +2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, +2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, +1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, +1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, +3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, +1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, +1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, +2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, +2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, +2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, +3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, +1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, +2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, +1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, +2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, +2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, +1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, +0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, +2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, +2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, +2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, +2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, +2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, +0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +) + +Latin2HungarianModel = { + 'charToOrderMap': Latin2_HungarianCharToOrderMap, + 'precedenceMatrix': HungarianLangModel, + 'mTypicalPositiveRatio': 0.947368, + 'keepEnglishLetter': True, + 'charsetName': "ISO-8859-2" +} + +Win1250HungarianModel = { + 'charToOrderMap': win1250HungarianCharToOrderMap, + 'precedenceMatrix': HungarianLangModel, + 'mTypicalPositiveRatio': 0.947368, + 'keepEnglishLetter': True, + 'charsetName': "windows-1250" +} + +# flake8: noqa diff --git a/botocore/vendored/requests/packages/charade/langthaimodel.py b/botocore/vendored/requests/packages/chardet/langthaimodel.py similarity index 98% rename from botocore/vendored/requests/packages/charade/langthaimodel.py rename to botocore/vendored/requests/packages/chardet/langthaimodel.py index df343a7473..0508b1b1ab 100644 --- a/botocore/vendored/requests/packages/charade/langthaimodel.py +++ b/botocore/vendored/requests/packages/chardet/langthaimodel.py @@ -1,200 +1,200 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# 255: Control characters that usually does not exist in any text -# 254: Carriage/Return -# 253: symbol (punctuation) that does not belong to word -# 252: 0 - 9 - -# The following result for thai was collected from a limited sample (1M). - -# Character Mapping Table: -TIS620CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40 -188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50 -253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60 - 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70 -209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, -223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, -236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, - 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, - 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, - 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, - 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, - 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 92.6386% -# first 1024 sequences:7.3177% -# rest sequences: 1.0230% -# negative sequences: 0.0436% -ThaiLangModel = ( -0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, -0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, -3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, -0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, -3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, -3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, -3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, -3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, -3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, -3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, -3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, -2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, -3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, -0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, -0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, -3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, -1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, -3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, -3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, -1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, -0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, -2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, -0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, -3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, -2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, -3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, -0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, -3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, -3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, -2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, -3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, -2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, -3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, -3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, -3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, -3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, -1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, -0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, -0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, -3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, -3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, -1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, -3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, -3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, -0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, -0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, -1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, -1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, -3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, -0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, -0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, -3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, -3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, -0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, -0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, -0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, -0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, -0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, -0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, -0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, -3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, -0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, -0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, -3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, -2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, -0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, -3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, -0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, -1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, -1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, -1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -) - -TIS620ThaiModel = { - 'charToOrderMap': TIS620CharToOrderMap, - 'precedenceMatrix': ThaiLangModel, - 'mTypicalPositiveRatio': 0.926386, - 'keepEnglishLetter': False, - 'charsetName': "TIS-620" -} - -# flake8: noqa +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# The following result for thai was collected from a limited sample (1M). + +# Character Mapping Table: +TIS620CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40 +188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50 +253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60 + 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70 +209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, +223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, +236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, + 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, + 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, + 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, + 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, + 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 92.6386% +# first 1024 sequences:7.3177% +# rest sequences: 1.0230% +# negative sequences: 0.0436% +ThaiLangModel = ( +0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, +0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, +3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, +0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, +3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, +3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, +3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, +3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, +2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, +3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, +1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, +3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, +1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, +0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, +0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, +2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, +0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, +3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, +2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, +3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, +2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, +3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, +3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, +3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, +3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, +1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, +0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, +0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, +3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, +3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, +1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, +3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, +3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, +0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, +0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, +1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, +1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, +3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, +0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, +3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, +0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, +0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, +0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, +0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, +0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, +0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, +0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, +3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, +2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, +3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, +1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +TIS620ThaiModel = { + 'charToOrderMap': TIS620CharToOrderMap, + 'precedenceMatrix': ThaiLangModel, + 'mTypicalPositiveRatio': 0.926386, + 'keepEnglishLetter': False, + 'charsetName': "TIS-620" +} + +# flake8: noqa diff --git a/botocore/vendored/requests/packages/charade/latin1prober.py b/botocore/vendored/requests/packages/chardet/latin1prober.py similarity index 94% rename from botocore/vendored/requests/packages/charade/latin1prober.py rename to botocore/vendored/requests/packages/chardet/latin1prober.py index 18eefd46a5..eef3573543 100644 --- a/botocore/vendored/requests/packages/charade/latin1prober.py +++ b/botocore/vendored/requests/packages/chardet/latin1prober.py @@ -1,139 +1,139 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .charsetprober import CharSetProber -from .constants import eNotMe -from .compat import wrap_ord - -FREQ_CAT_NUM = 4 - -UDF = 0 # undefined -OTH = 1 # other -ASC = 2 # ascii capital letter -ASS = 3 # ascii small letter -ACV = 4 # accent capital vowel -ACO = 5 # accent capital other -ASV = 6 # accent small vowel -ASO = 7 # accent small other -CLASS_NUM = 8 # total classes - -Latin1_CharToClass = ( - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F - OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 - ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F - ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 - ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F - OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 - ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F - ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 - ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F - OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 - OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F - UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 - OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF - ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 - ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF - ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 - ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF - ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 - ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF - ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 - ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF -) - -# 0 : illegal -# 1 : very unlikely -# 2 : normal -# 3 : very likely -Latin1ClassModel = ( - # UDF OTH ASC ASS ACV ACO ASV ASO - 0, 0, 0, 0, 0, 0, 0, 0, # UDF - 0, 3, 3, 3, 3, 3, 3, 3, # OTH - 0, 3, 3, 3, 3, 3, 3, 3, # ASC - 0, 3, 3, 3, 1, 1, 3, 3, # ASS - 0, 3, 3, 3, 1, 2, 1, 2, # ACV - 0, 3, 3, 3, 3, 3, 3, 3, # ACO - 0, 3, 1, 3, 1, 1, 1, 3, # ASV - 0, 3, 1, 3, 1, 1, 3, 3, # ASO -) - - -class Latin1Prober(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self.reset() - - def reset(self): - self._mLastCharClass = OTH - self._mFreqCounter = [0] * FREQ_CAT_NUM - CharSetProber.reset(self) - - def get_charset_name(self): - return "windows-1252" - - def feed(self, aBuf): - aBuf = self.filter_with_english_letters(aBuf) - for c in aBuf: - charClass = Latin1_CharToClass[wrap_ord(c)] - freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) - + charClass] - if freq == 0: - self._mState = eNotMe - break - self._mFreqCounter[freq] += 1 - self._mLastCharClass = charClass - - return self.get_state() - - def get_confidence(self): - if self.get_state() == eNotMe: - return 0.01 - - total = sum(self._mFreqCounter) - if total < 0.01: - confidence = 0.0 - else: - confidence = ((float(self._mFreqCounter[3]) / total) - - (self._mFreqCounter[1] * 20.0 / total)) - if confidence < 0.0: - confidence = 0.0 - # lower the confidence of latin1 so that other more accurate - # detector can take priority. - confidence = confidence * 0.5 - return confidence +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .constants import eNotMe +from .compat import wrap_ord + +FREQ_CAT_NUM = 4 + +UDF = 0 # undefined +OTH = 1 # other +ASC = 2 # ascii capital letter +ASS = 3 # ascii small letter +ACV = 4 # accent capital vowel +ACO = 5 # accent capital other +ASV = 6 # accent small vowel +ASO = 7 # accent small other +CLASS_NUM = 8 # total classes + +Latin1_CharToClass = ( + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F + OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 + ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F + OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 + ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F + OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 + OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F + UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 + OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF + ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 + ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF + ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 + ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF + ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 + ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF + ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 + ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF +) + +# 0 : illegal +# 1 : very unlikely +# 2 : normal +# 3 : very likely +Latin1ClassModel = ( + # UDF OTH ASC ASS ACV ACO ASV ASO + 0, 0, 0, 0, 0, 0, 0, 0, # UDF + 0, 3, 3, 3, 3, 3, 3, 3, # OTH + 0, 3, 3, 3, 3, 3, 3, 3, # ASC + 0, 3, 3, 3, 1, 1, 3, 3, # ASS + 0, 3, 3, 3, 1, 2, 1, 2, # ACV + 0, 3, 3, 3, 3, 3, 3, 3, # ACO + 0, 3, 1, 3, 1, 1, 1, 3, # ASV + 0, 3, 1, 3, 1, 1, 3, 3, # ASO +) + + +class Latin1Prober(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self.reset() + + def reset(self): + self._mLastCharClass = OTH + self._mFreqCounter = [0] * FREQ_CAT_NUM + CharSetProber.reset(self) + + def get_charset_name(self): + return "windows-1252" + + def feed(self, aBuf): + aBuf = self.filter_with_english_letters(aBuf) + for c in aBuf: + charClass = Latin1_CharToClass[wrap_ord(c)] + freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + + charClass] + if freq == 0: + self._mState = eNotMe + break + self._mFreqCounter[freq] += 1 + self._mLastCharClass = charClass + + return self.get_state() + + def get_confidence(self): + if self.get_state() == eNotMe: + return 0.01 + + total = sum(self._mFreqCounter) + if total < 0.01: + confidence = 0.0 + else: + confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0) + / total) + if confidence < 0.0: + confidence = 0.0 + # lower the confidence of latin1 so that other more accurate + # detector can take priority. + confidence = confidence * 0.73 + return confidence diff --git a/botocore/vendored/requests/packages/charade/mbcharsetprober.py b/botocore/vendored/requests/packages/chardet/mbcharsetprober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/mbcharsetprober.py rename to botocore/vendored/requests/packages/chardet/mbcharsetprober.py index 1eee253c04..bb42f2fb5e 100644 --- a/botocore/vendored/requests/packages/charade/mbcharsetprober.py +++ b/botocore/vendored/requests/packages/chardet/mbcharsetprober.py @@ -1,86 +1,86 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# Proofpoint, Inc. -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -import sys -from . import constants -from .charsetprober import CharSetProber - - -class MultiByteCharSetProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mDistributionAnalyzer = None - self._mCodingSM = None - self._mLastChar = [0, 0] - - def reset(self): - CharSetProber.reset(self) - if self._mCodingSM: - self._mCodingSM.reset() - if self._mDistributionAnalyzer: - self._mDistributionAnalyzer.reset() - self._mLastChar = [0, 0] - - def get_charset_name(self): - pass - - def feed(self, aBuf): - aLen = len(aBuf) - for i in range(0, aLen): - codingState = self._mCodingSM.next_state(aBuf[i]) - if codingState == constants.eError: - if constants._debug: - sys.stderr.write(self.get_charset_name() - + ' prober hit error at byte ' + str(i) - + '\n') - self._mState = constants.eNotMe - break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - break - elif codingState == constants.eStart: - charLen = self._mCodingSM.get_current_charlen() - if i == 0: - self._mLastChar[1] = aBuf[0] - self._mDistributionAnalyzer.feed(self._mLastChar, charLen) - else: - self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], - charLen) - - self._mLastChar[0] = aBuf[aLen - 1] - - if self.get_state() == constants.eDetecting: - if (self._mDistributionAnalyzer.got_enough_data() and - (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): - self._mState = constants.eFoundIt - - return self.get_state() - - def get_confidence(self): - return self._mDistributionAnalyzer.get_confidence() +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import sys +from . import constants +from .charsetprober import CharSetProber + + +class MultiByteCharSetProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mDistributionAnalyzer = None + self._mCodingSM = None + self._mLastChar = [0, 0] + + def reset(self): + CharSetProber.reset(self) + if self._mCodingSM: + self._mCodingSM.reset() + if self._mDistributionAnalyzer: + self._mDistributionAnalyzer.reset() + self._mLastChar = [0, 0] + + def get_charset_name(self): + pass + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == constants.eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + + ' prober hit error at byte ' + str(i) + + '\n') + self._mState = constants.eNotMe + break + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == constants.eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], + charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if (self._mDistributionAnalyzer.got_enough_data() and + (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + return self._mDistributionAnalyzer.get_confidence() diff --git a/botocore/vendored/requests/packages/charade/mbcsgroupprober.py b/botocore/vendored/requests/packages/chardet/mbcsgroupprober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/mbcsgroupprober.py rename to botocore/vendored/requests/packages/chardet/mbcsgroupprober.py index 2f6f5e897f..03c9dcf3eb 100644 --- a/botocore/vendored/requests/packages/charade/mbcsgroupprober.py +++ b/botocore/vendored/requests/packages/chardet/mbcsgroupprober.py @@ -1,54 +1,54 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# Proofpoint, Inc. -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .charsetgroupprober import CharSetGroupProber -from .utf8prober import UTF8Prober -from .sjisprober import SJISProber -from .eucjpprober import EUCJPProber -from .gb2312prober import GB2312Prober -from .euckrprober import EUCKRProber -from .cp949prober import CP949Prober -from .big5prober import Big5Prober -from .euctwprober import EUCTWProber - - -class MBCSGroupProber(CharSetGroupProber): - def __init__(self): - CharSetGroupProber.__init__(self) - self._mProbers = [ - UTF8Prober(), - SJISProber(), - EUCJPProber(), - GB2312Prober(), - EUCKRProber(), - CP949Prober(), - Big5Prober(), - EUCTWProber() - ] - self.reset() +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetgroupprober import CharSetGroupProber +from .utf8prober import UTF8Prober +from .sjisprober import SJISProber +from .eucjpprober import EUCJPProber +from .gb2312prober import GB2312Prober +from .euckrprober import EUCKRProber +from .cp949prober import CP949Prober +from .big5prober import Big5Prober +from .euctwprober import EUCTWProber + + +class MBCSGroupProber(CharSetGroupProber): + def __init__(self): + CharSetGroupProber.__init__(self) + self._mProbers = [ + UTF8Prober(), + SJISProber(), + EUCJPProber(), + GB2312Prober(), + EUCKRProber(), + CP949Prober(), + Big5Prober(), + EUCTWProber() + ] + self.reset() diff --git a/botocore/vendored/requests/packages/charade/mbcssm.py b/botocore/vendored/requests/packages/chardet/mbcssm.py similarity index 96% rename from botocore/vendored/requests/packages/charade/mbcssm.py rename to botocore/vendored/requests/packages/chardet/mbcssm.py index 55c02f0a06..efe678ca03 100644 --- a/botocore/vendored/requests/packages/charade/mbcssm.py +++ b/botocore/vendored/requests/packages/chardet/mbcssm.py @@ -1,575 +1,572 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .constants import eStart, eError, eItsMe - -# BIG5 - -BIG5_cls = ( - 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,1, # 78 - 7f - 4,4,4,4,4,4,4,4, # 80 - 87 - 4,4,4,4,4,4,4,4, # 88 - 8f - 4,4,4,4,4,4,4,4, # 90 - 97 - 4,4,4,4,4,4,4,4, # 98 - 9f - 4,3,3,3,3,3,3,3, # a0 - a7 - 3,3,3,3,3,3,3,3, # a8 - af - 3,3,3,3,3,3,3,3, # b0 - b7 - 3,3,3,3,3,3,3,3, # b8 - bf - 3,3,3,3,3,3,3,3, # c0 - c7 - 3,3,3,3,3,3,3,3, # c8 - cf - 3,3,3,3,3,3,3,3, # d0 - d7 - 3,3,3,3,3,3,3,3, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,3,3,3, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,3,3,0 # f8 - ff -) - -BIG5_st = ( - eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 - eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f - eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17 -) - -Big5CharLenTable = (0, 1, 1, 2, 0) - -Big5SMModel = {'classTable': BIG5_cls, - 'classFactor': 5, - 'stateTable': BIG5_st, - 'charLenTable': Big5CharLenTable, - 'name': 'Big5'} - -# CP949 - -CP949_cls = ( - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f - 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f - 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f - 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f - 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f - 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f - 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f - 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f - 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af - 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf - 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff -) - -CP949_st = ( -#cls= 0 1 2 3 4 5 6 7 8 9 # previous state = - eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart - eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe - eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3 - eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4 - eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5 - eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6 -) - -CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) - -CP949SMModel = {'classTable': CP949_cls, - 'classFactor': 10, - 'stateTable': CP949_st, - 'charLenTable': CP949CharLenTable, - 'name': 'CP949'} - -# EUC-JP - -EUCJP_cls = ( - 4,4,4,4,4,4,4,4, # 00 - 07 - 4,4,4,4,4,4,5,5, # 08 - 0f - 4,4,4,4,4,4,4,4, # 10 - 17 - 4,4,4,5,4,4,4,4, # 18 - 1f - 4,4,4,4,4,4,4,4, # 20 - 27 - 4,4,4,4,4,4,4,4, # 28 - 2f - 4,4,4,4,4,4,4,4, # 30 - 37 - 4,4,4,4,4,4,4,4, # 38 - 3f - 4,4,4,4,4,4,4,4, # 40 - 47 - 4,4,4,4,4,4,4,4, # 48 - 4f - 4,4,4,4,4,4,4,4, # 50 - 57 - 4,4,4,4,4,4,4,4, # 58 - 5f - 4,4,4,4,4,4,4,4, # 60 - 67 - 4,4,4,4,4,4,4,4, # 68 - 6f - 4,4,4,4,4,4,4,4, # 70 - 77 - 4,4,4,4,4,4,4,4, # 78 - 7f - 5,5,5,5,5,5,5,5, # 80 - 87 - 5,5,5,5,5,5,1,3, # 88 - 8f - 5,5,5,5,5,5,5,5, # 90 - 97 - 5,5,5,5,5,5,5,5, # 98 - 9f - 5,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,2,2,2, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,2,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,0,5 # f8 - ff -) - -EUCJP_st = ( - 3, 4, 3, 5,eStart,eError,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 - eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f - 3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27 -) - -EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) - -EUCJPSMModel = {'classTable': EUCJP_cls, - 'classFactor': 6, - 'stateTable': EUCJP_st, - 'charLenTable': EUCJPCharLenTable, - 'name': 'EUC-JP'} - -# EUC-KR - -EUCKR_cls = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 1,1,1,1,1,1,1,1, # 40 - 47 - 1,1,1,1,1,1,1,1, # 48 - 4f - 1,1,1,1,1,1,1,1, # 50 - 57 - 1,1,1,1,1,1,1,1, # 58 - 5f - 1,1,1,1,1,1,1,1, # 60 - 67 - 1,1,1,1,1,1,1,1, # 68 - 6f - 1,1,1,1,1,1,1,1, # 70 - 77 - 1,1,1,1,1,1,1,1, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,3,3,3, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,3,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 2,2,2,2,2,2,2,2, # e0 - e7 - 2,2,2,2,2,2,2,2, # e8 - ef - 2,2,2,2,2,2,2,2, # f0 - f7 - 2,2,2,2,2,2,2,0 # f8 - ff -) - -EUCKR_st = ( - eError,eStart, 3,eError,eError,eError,eError,eError,#00-07 - eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f -) - -EUCKRCharLenTable = (0, 1, 2, 0) - -EUCKRSMModel = {'classTable': EUCKR_cls, - 'classFactor': 4, - 'stateTable': EUCKR_st, - 'charLenTable': EUCKRCharLenTable, - 'name': 'EUC-KR'} - -# EUC-TW - -EUCTW_cls = ( - 2,2,2,2,2,2,2,2, # 00 - 07 - 2,2,2,2,2,2,0,0, # 08 - 0f - 2,2,2,2,2,2,2,2, # 10 - 17 - 2,2,2,0,2,2,2,2, # 18 - 1f - 2,2,2,2,2,2,2,2, # 20 - 27 - 2,2,2,2,2,2,2,2, # 28 - 2f - 2,2,2,2,2,2,2,2, # 30 - 37 - 2,2,2,2,2,2,2,2, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,2, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,6,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,3,4,4,4,4,4,4, # a0 - a7 - 5,5,1,1,1,1,1,1, # a8 - af - 1,1,1,1,1,1,1,1, # b0 - b7 - 1,1,1,1,1,1,1,1, # b8 - bf - 1,1,3,1,3,3,3,3, # c0 - c7 - 3,3,3,3,3,3,3,3, # c8 - cf - 3,3,3,3,3,3,3,3, # d0 - d7 - 3,3,3,3,3,3,3,3, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,3,3,3, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,3,3,0 # f8 - ff -) - -EUCTW_st = ( - eError,eError,eStart, 3, 3, 3, 4,eError,#00-07 - eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 - eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f - 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 - eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f -) - -EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) - -EUCTWSMModel = {'classTable': EUCTW_cls, - 'classFactor': 7, - 'stateTable': EUCTW_st, - 'charLenTable': EUCTWCharLenTable, - 'name': 'x-euc-tw'} - -# GB2312 - -GB2312_cls = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 3,3,3,3,3,3,3,3, # 30 - 37 - 3,3,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,4, # 78 - 7f - 5,6,6,6,6,6,6,6, # 80 - 87 - 6,6,6,6,6,6,6,6, # 88 - 8f - 6,6,6,6,6,6,6,6, # 90 - 97 - 6,6,6,6,6,6,6,6, # 98 - 9f - 6,6,6,6,6,6,6,6, # a0 - a7 - 6,6,6,6,6,6,6,6, # a8 - af - 6,6,6,6,6,6,6,6, # b0 - b7 - 6,6,6,6,6,6,6,6, # b8 - bf - 6,6,6,6,6,6,6,6, # c0 - c7 - 6,6,6,6,6,6,6,6, # c8 - cf - 6,6,6,6,6,6,6,6, # d0 - d7 - 6,6,6,6,6,6,6,6, # d8 - df - 6,6,6,6,6,6,6,6, # e0 - e7 - 6,6,6,6,6,6,6,6, # e8 - ef - 6,6,6,6,6,6,6,6, # f0 - f7 - 6,6,6,6,6,6,6,0 # f8 - ff -) - -GB2312_st = ( - eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07 - eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 - 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f - eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27 - eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f -) - -# To be accurate, the length of class 6 can be either 2 or 4. -# But it is not necessary to discriminate between the two since -# it is used for frequency analysis only, and we are validing -# each code range there as well. So it is safe to set it to be -# 2 here. -GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) - -GB2312SMModel = {'classTable': GB2312_cls, - 'classFactor': 7, - 'stateTable': GB2312_st, - 'charLenTable': GB2312CharLenTable, - 'name': 'GB2312'} - -# Shift_JIS - -SJIS_cls = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,1, # 78 - 7f - 3,3,3,3,3,3,3,3, # 80 - 87 - 3,3,3,3,3,3,3,3, # 88 - 8f - 3,3,3,3,3,3,3,3, # 90 - 97 - 3,3,3,3,3,3,3,3, # 98 - 9f - #0xa0 is illegal in sjis encoding, but some pages does - #contain such byte. We need to be more error forgiven. - 2,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,2,2,2, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,2,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,4,4,4, # e8 - ef - 4,4,4,4,4,4,4,4, # f0 - f7 - 4,4,4,4,4,0,0,0 # f8 - ff -) - - -SJIS_st = ( - eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17 -) - -SJISCharLenTable = (0, 1, 1, 2, 0, 0) - -SJISSMModel = {'classTable': SJIS_cls, - 'classFactor': 6, - 'stateTable': SJIS_st, - 'charLenTable': SJISCharLenTable, - 'name': 'Shift_JIS'} - -# UCS2-BE - -UCS2BE_cls = ( - 0,0,0,0,0,0,0,0, # 00 - 07 - 0,0,1,0,0,2,0,0, # 08 - 0f - 0,0,0,0,0,0,0,0, # 10 - 17 - 0,0,0,3,0,0,0,0, # 18 - 1f - 0,0,0,0,0,0,0,0, # 20 - 27 - 0,3,3,3,3,3,0,0, # 28 - 2f - 0,0,0,0,0,0,0,0, # 30 - 37 - 0,0,0,0,0,0,0,0, # 38 - 3f - 0,0,0,0,0,0,0,0, # 40 - 47 - 0,0,0,0,0,0,0,0, # 48 - 4f - 0,0,0,0,0,0,0,0, # 50 - 57 - 0,0,0,0,0,0,0,0, # 58 - 5f - 0,0,0,0,0,0,0,0, # 60 - 67 - 0,0,0,0,0,0,0,0, # 68 - 6f - 0,0,0,0,0,0,0,0, # 70 - 77 - 0,0,0,0,0,0,0,0, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,0,0,0,0,0,0,0, # a0 - a7 - 0,0,0,0,0,0,0,0, # a8 - af - 0,0,0,0,0,0,0,0, # b0 - b7 - 0,0,0,0,0,0,0,0, # b8 - bf - 0,0,0,0,0,0,0,0, # c0 - c7 - 0,0,0,0,0,0,0,0, # c8 - cf - 0,0,0,0,0,0,0,0, # d0 - d7 - 0,0,0,0,0,0,0,0, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,4,5 # f8 - ff -) - -UCS2BE_st = ( - 5, 7, 7,eError, 4, 3,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17 - 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f - 6, 6, 6, 6, 5, 7, 7,eError,#20-27 - 5, 8, 6, 6,eError, 6, 6, 6,#28-2f - 6, 6, 6, 6,eError,eError,eStart,eStart #30-37 -) - -UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) - -UCS2BESMModel = {'classTable': UCS2BE_cls, - 'classFactor': 6, - 'stateTable': UCS2BE_st, - 'charLenTable': UCS2BECharLenTable, - 'name': 'UTF-16BE'} - -# UCS2-LE - -UCS2LE_cls = ( - 0,0,0,0,0,0,0,0, # 00 - 07 - 0,0,1,0,0,2,0,0, # 08 - 0f - 0,0,0,0,0,0,0,0, # 10 - 17 - 0,0,0,3,0,0,0,0, # 18 - 1f - 0,0,0,0,0,0,0,0, # 20 - 27 - 0,3,3,3,3,3,0,0, # 28 - 2f - 0,0,0,0,0,0,0,0, # 30 - 37 - 0,0,0,0,0,0,0,0, # 38 - 3f - 0,0,0,0,0,0,0,0, # 40 - 47 - 0,0,0,0,0,0,0,0, # 48 - 4f - 0,0,0,0,0,0,0,0, # 50 - 57 - 0,0,0,0,0,0,0,0, # 58 - 5f - 0,0,0,0,0,0,0,0, # 60 - 67 - 0,0,0,0,0,0,0,0, # 68 - 6f - 0,0,0,0,0,0,0,0, # 70 - 77 - 0,0,0,0,0,0,0,0, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,0,0,0,0,0,0,0, # a0 - a7 - 0,0,0,0,0,0,0,0, # a8 - af - 0,0,0,0,0,0,0,0, # b0 - b7 - 0,0,0,0,0,0,0,0, # b8 - bf - 0,0,0,0,0,0,0,0, # c0 - c7 - 0,0,0,0,0,0,0,0, # c8 - cf - 0,0,0,0,0,0,0,0, # d0 - d7 - 0,0,0,0,0,0,0,0, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,4,5 # f8 - ff -) - -UCS2LE_st = ( - 6, 6, 7, 6, 4, 3,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17 - 5, 5, 5,eError, 5,eError, 6, 6,#18-1f - 7, 6, 8, 8, 5, 5, 5,eError,#20-27 - 5, 5, 5,eError,eError,eError, 5, 5,#28-2f - 5, 5, 5,eError, 5,eError,eStart,eStart #30-37 -) - -UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) - -UCS2LESMModel = {'classTable': UCS2LE_cls, - 'classFactor': 6, - 'stateTable': UCS2LE_st, - 'charLenTable': UCS2LECharLenTable, - 'name': 'UTF-16LE'} - -# UTF-8 - -UTF8_cls = ( - 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 1,1,1,1,1,1,1,1, # 40 - 47 - 1,1,1,1,1,1,1,1, # 48 - 4f - 1,1,1,1,1,1,1,1, # 50 - 57 - 1,1,1,1,1,1,1,1, # 58 - 5f - 1,1,1,1,1,1,1,1, # 60 - 67 - 1,1,1,1,1,1,1,1, # 68 - 6f - 1,1,1,1,1,1,1,1, # 70 - 77 - 1,1,1,1,1,1,1,1, # 78 - 7f - 2,2,2,2,3,3,3,3, # 80 - 87 - 4,4,4,4,4,4,4,4, # 88 - 8f - 4,4,4,4,4,4,4,4, # 90 - 97 - 4,4,4,4,4,4,4,4, # 98 - 9f - 5,5,5,5,5,5,5,5, # a0 - a7 - 5,5,5,5,5,5,5,5, # a8 - af - 5,5,5,5,5,5,5,5, # b0 - b7 - 5,5,5,5,5,5,5,5, # b8 - bf - 0,0,6,6,6,6,6,6, # c0 - c7 - 6,6,6,6,6,6,6,6, # c8 - cf - 6,6,6,6,6,6,6,6, # d0 - d7 - 6,6,6,6,6,6,6,6, # d8 - df - 7,8,8,8,8,8,8,8, # e0 - e7 - 8,8,8,8,8,9,8,8, # e8 - ef - 10,11,11,11,11,11,11,11, # f0 - f7 - 12,13,13,13,14,15,0,0 # f8 - ff -) - -UTF8_st = ( - eError,eStart,eError,eError,eError,eError, 12, 10,#00-07 - 9, 11, 8, 7, 6, 5, 4, 3,#08-0f - eError,eError,eError,eError,eError,eError,eError,eError,#10-17 - eError,eError,eError,eError,eError,eError,eError,eError,#18-1f - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f - eError,eError, 5, 5, 5, 5,eError,eError,#30-37 - eError,eError,eError,eError,eError,eError,eError,eError,#38-3f - eError,eError,eError, 5, 5, 5,eError,eError,#40-47 - eError,eError,eError,eError,eError,eError,eError,eError,#48-4f - eError,eError, 7, 7, 7, 7,eError,eError,#50-57 - eError,eError,eError,eError,eError,eError,eError,eError,#58-5f - eError,eError,eError,eError, 7, 7,eError,eError,#60-67 - eError,eError,eError,eError,eError,eError,eError,eError,#68-6f - eError,eError, 9, 9, 9, 9,eError,eError,#70-77 - eError,eError,eError,eError,eError,eError,eError,eError,#78-7f - eError,eError,eError,eError,eError, 9,eError,eError,#80-87 - eError,eError,eError,eError,eError,eError,eError,eError,#88-8f - eError,eError, 12, 12, 12, 12,eError,eError,#90-97 - eError,eError,eError,eError,eError,eError,eError,eError,#98-9f - eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7 - eError,eError,eError,eError,eError,eError,eError,eError,#a8-af - eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7 - eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf - eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 - eError,eError,eError,eError,eError,eError,eError,eError #c8-cf -) - -UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) - -UTF8SMModel = {'classTable': UTF8_cls, - 'classFactor': 16, - 'stateTable': UTF8_st, - 'charLenTable': UTF8CharLenTable, - 'name': 'UTF-8'} - -# flake8: noqa +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .constants import eStart, eError, eItsMe + +# BIG5 + +BIG5_cls = ( + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 4,4,4,4,4,4,4,4, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 4,3,3,3,3,3,3,3, # a0 - a7 + 3,3,3,3,3,3,3,3, # a8 - af + 3,3,3,3,3,3,3,3, # b0 - b7 + 3,3,3,3,3,3,3,3, # b8 - bf + 3,3,3,3,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0 # f8 - ff +) + +BIG5_st = ( + eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 + eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f + eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17 +) + +Big5CharLenTable = (0, 1, 1, 2, 0) + +Big5SMModel = {'classTable': BIG5_cls, + 'classFactor': 5, + 'stateTable': BIG5_st, + 'charLenTable': Big5CharLenTable, + 'name': 'Big5'} + +# CP949 + +CP949_cls = ( + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f + 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f + 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f + 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f + 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f + 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f + 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f + 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f + 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af + 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf + 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff +) + +CP949_st = ( +#cls= 0 1 2 3 4 5 6 7 8 9 # previous state = + eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart + eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe + eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3 + eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4 + eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5 + eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6 +) + +CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) + +CP949SMModel = {'classTable': CP949_cls, + 'classFactor': 10, + 'stateTable': CP949_st, + 'charLenTable': CP949CharLenTable, + 'name': 'CP949'} + +# EUC-JP + +EUCJP_cls = ( + 4,4,4,4,4,4,4,4, # 00 - 07 + 4,4,4,4,4,4,5,5, # 08 - 0f + 4,4,4,4,4,4,4,4, # 10 - 17 + 4,4,4,5,4,4,4,4, # 18 - 1f + 4,4,4,4,4,4,4,4, # 20 - 27 + 4,4,4,4,4,4,4,4, # 28 - 2f + 4,4,4,4,4,4,4,4, # 30 - 37 + 4,4,4,4,4,4,4,4, # 38 - 3f + 4,4,4,4,4,4,4,4, # 40 - 47 + 4,4,4,4,4,4,4,4, # 48 - 4f + 4,4,4,4,4,4,4,4, # 50 - 57 + 4,4,4,4,4,4,4,4, # 58 - 5f + 4,4,4,4,4,4,4,4, # 60 - 67 + 4,4,4,4,4,4,4,4, # 68 - 6f + 4,4,4,4,4,4,4,4, # 70 - 77 + 4,4,4,4,4,4,4,4, # 78 - 7f + 5,5,5,5,5,5,5,5, # 80 - 87 + 5,5,5,5,5,5,1,3, # 88 - 8f + 5,5,5,5,5,5,5,5, # 90 - 97 + 5,5,5,5,5,5,5,5, # 98 - 9f + 5,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,0,5 # f8 - ff +) + +EUCJP_st = ( + 3, 4, 3, 5,eStart,eError,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 + eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f + 3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27 +) + +EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) + +EUCJPSMModel = {'classTable': EUCJP_cls, + 'classFactor': 6, + 'stateTable': EUCJP_st, + 'charLenTable': EUCJPCharLenTable, + 'name': 'EUC-JP'} + +# EUC-KR + +EUCKR_cls = ( + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,3,3,3, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,3,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 2,2,2,2,2,2,2,2, # e0 - e7 + 2,2,2,2,2,2,2,2, # e8 - ef + 2,2,2,2,2,2,2,2, # f0 - f7 + 2,2,2,2,2,2,2,0 # f8 - ff +) + +EUCKR_st = ( + eError,eStart, 3,eError,eError,eError,eError,eError,#00-07 + eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f +) + +EUCKRCharLenTable = (0, 1, 2, 0) + +EUCKRSMModel = {'classTable': EUCKR_cls, + 'classFactor': 4, + 'stateTable': EUCKR_st, + 'charLenTable': EUCKRCharLenTable, + 'name': 'EUC-KR'} + +# EUC-TW + +EUCTW_cls = ( + 2,2,2,2,2,2,2,2, # 00 - 07 + 2,2,2,2,2,2,0,0, # 08 - 0f + 2,2,2,2,2,2,2,2, # 10 - 17 + 2,2,2,0,2,2,2,2, # 18 - 1f + 2,2,2,2,2,2,2,2, # 20 - 27 + 2,2,2,2,2,2,2,2, # 28 - 2f + 2,2,2,2,2,2,2,2, # 30 - 37 + 2,2,2,2,2,2,2,2, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,2, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,6,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,3,4,4,4,4,4,4, # a0 - a7 + 5,5,1,1,1,1,1,1, # a8 - af + 1,1,1,1,1,1,1,1, # b0 - b7 + 1,1,1,1,1,1,1,1, # b8 - bf + 1,1,3,1,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0 # f8 - ff +) + +EUCTW_st = ( + eError,eError,eStart, 3, 3, 3, 4,eError,#00-07 + eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 + eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f + 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 + eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f +) + +EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) + +EUCTWSMModel = {'classTable': EUCTW_cls, + 'classFactor': 7, + 'stateTable': EUCTW_st, + 'charLenTable': EUCTWCharLenTable, + 'name': 'x-euc-tw'} + +# GB2312 + +GB2312_cls = ( + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 3,3,3,3,3,3,3,3, # 30 - 37 + 3,3,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,4, # 78 - 7f + 5,6,6,6,6,6,6,6, # 80 - 87 + 6,6,6,6,6,6,6,6, # 88 - 8f + 6,6,6,6,6,6,6,6, # 90 - 97 + 6,6,6,6,6,6,6,6, # 98 - 9f + 6,6,6,6,6,6,6,6, # a0 - a7 + 6,6,6,6,6,6,6,6, # a8 - af + 6,6,6,6,6,6,6,6, # b0 - b7 + 6,6,6,6,6,6,6,6, # b8 - bf + 6,6,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 6,6,6,6,6,6,6,6, # e0 - e7 + 6,6,6,6,6,6,6,6, # e8 - ef + 6,6,6,6,6,6,6,6, # f0 - f7 + 6,6,6,6,6,6,6,0 # f8 - ff +) + +GB2312_st = ( + eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07 + eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 + 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f + eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27 + eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f +) + +# To be accurate, the length of class 6 can be either 2 or 4. +# But it is not necessary to discriminate between the two since +# it is used for frequency analysis only, and we are validing +# each code range there as well. So it is safe to set it to be +# 2 here. +GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) + +GB2312SMModel = {'classTable': GB2312_cls, + 'classFactor': 7, + 'stateTable': GB2312_st, + 'charLenTable': GB2312CharLenTable, + 'name': 'GB2312'} + +# Shift_JIS + +SJIS_cls = ( + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 3,3,3,3,3,2,2,3, # 80 - 87 + 3,3,3,3,3,3,3,3, # 88 - 8f + 3,3,3,3,3,3,3,3, # 90 - 97 + 3,3,3,3,3,3,3,3, # 98 - 9f + #0xa0 is illegal in sjis encoding, but some pages does + #contain such byte. We need to be more error forgiven. + 2,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,4,4,4, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,0,0,0) # f8 - ff + + +SJIS_st = ( + eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17 +) + +SJISCharLenTable = (0, 1, 1, 2, 0, 0) + +SJISSMModel = {'classTable': SJIS_cls, + 'classFactor': 6, + 'stateTable': SJIS_st, + 'charLenTable': SJISCharLenTable, + 'name': 'Shift_JIS'} + +# UCS2-BE + +UCS2BE_cls = ( + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5 # f8 - ff +) + +UCS2BE_st = ( + 5, 7, 7,eError, 4, 3,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17 + 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f + 6, 6, 6, 6, 5, 7, 7,eError,#20-27 + 5, 8, 6, 6,eError, 6, 6, 6,#28-2f + 6, 6, 6, 6,eError,eError,eStart,eStart #30-37 +) + +UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) + +UCS2BESMModel = {'classTable': UCS2BE_cls, + 'classFactor': 6, + 'stateTable': UCS2BE_st, + 'charLenTable': UCS2BECharLenTable, + 'name': 'UTF-16BE'} + +# UCS2-LE + +UCS2LE_cls = ( + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5 # f8 - ff +) + +UCS2LE_st = ( + 6, 6, 7, 6, 4, 3,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17 + 5, 5, 5,eError, 5,eError, 6, 6,#18-1f + 7, 6, 8, 8, 5, 5, 5,eError,#20-27 + 5, 5, 5,eError,eError,eError, 5, 5,#28-2f + 5, 5, 5,eError, 5,eError,eStart,eStart #30-37 +) + +UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) + +UCS2LESMModel = {'classTable': UCS2LE_cls, + 'classFactor': 6, + 'stateTable': UCS2LE_st, + 'charLenTable': UCS2LECharLenTable, + 'name': 'UTF-16LE'} + +# UTF-8 + +UTF8_cls = ( + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 2,2,2,2,3,3,3,3, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 5,5,5,5,5,5,5,5, # a0 - a7 + 5,5,5,5,5,5,5,5, # a8 - af + 5,5,5,5,5,5,5,5, # b0 - b7 + 5,5,5,5,5,5,5,5, # b8 - bf + 0,0,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 7,8,8,8,8,8,8,8, # e0 - e7 + 8,8,8,8,8,9,8,8, # e8 - ef + 10,11,11,11,11,11,11,11, # f0 - f7 + 12,13,13,13,14,15,0,0 # f8 - ff +) + +UTF8_st = ( + eError,eStart,eError,eError,eError,eError, 12, 10,#00-07 + 9, 11, 8, 7, 6, 5, 4, 3,#08-0f + eError,eError,eError,eError,eError,eError,eError,eError,#10-17 + eError,eError,eError,eError,eError,eError,eError,eError,#18-1f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f + eError,eError, 5, 5, 5, 5,eError,eError,#30-37 + eError,eError,eError,eError,eError,eError,eError,eError,#38-3f + eError,eError,eError, 5, 5, 5,eError,eError,#40-47 + eError,eError,eError,eError,eError,eError,eError,eError,#48-4f + eError,eError, 7, 7, 7, 7,eError,eError,#50-57 + eError,eError,eError,eError,eError,eError,eError,eError,#58-5f + eError,eError,eError,eError, 7, 7,eError,eError,#60-67 + eError,eError,eError,eError,eError,eError,eError,eError,#68-6f + eError,eError, 9, 9, 9, 9,eError,eError,#70-77 + eError,eError,eError,eError,eError,eError,eError,eError,#78-7f + eError,eError,eError,eError,eError, 9,eError,eError,#80-87 + eError,eError,eError,eError,eError,eError,eError,eError,#88-8f + eError,eError, 12, 12, 12, 12,eError,eError,#90-97 + eError,eError,eError,eError,eError,eError,eError,eError,#98-9f + eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7 + eError,eError,eError,eError,eError,eError,eError,eError,#a8-af + eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7 + eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf + eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 + eError,eError,eError,eError,eError,eError,eError,eError #c8-cf +) + +UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) + +UTF8SMModel = {'classTable': UTF8_cls, + 'classFactor': 16, + 'stateTable': UTF8_st, + 'charLenTable': UTF8CharLenTable, + 'name': 'UTF-8'} diff --git a/botocore/vendored/requests/packages/charade/sbcharsetprober.py b/botocore/vendored/requests/packages/chardet/sbcharsetprober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/sbcharsetprober.py rename to botocore/vendored/requests/packages/chardet/sbcharsetprober.py index da26715cfc..37291bd27a 100644 --- a/botocore/vendored/requests/packages/charade/sbcharsetprober.py +++ b/botocore/vendored/requests/packages/chardet/sbcharsetprober.py @@ -1,120 +1,120 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -import sys -from . import constants -from .charsetprober import CharSetProber -from .compat import wrap_ord - -SAMPLE_SIZE = 64 -SB_ENOUGH_REL_THRESHOLD = 1024 -POSITIVE_SHORTCUT_THRESHOLD = 0.95 -NEGATIVE_SHORTCUT_THRESHOLD = 0.05 -SYMBOL_CAT_ORDER = 250 -NUMBER_OF_SEQ_CAT = 4 -POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 -#NEGATIVE_CAT = 0 - - -class SingleByteCharSetProber(CharSetProber): - def __init__(self, model, reversed=False, nameProber=None): - CharSetProber.__init__(self) - self._mModel = model - # TRUE if we need to reverse every pair in the model lookup - self._mReversed = reversed - # Optional auxiliary prober for name decision - self._mNameProber = nameProber - self.reset() - - def reset(self): - CharSetProber.reset(self) - # char order of last character - self._mLastOrder = 255 - self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT - self._mTotalSeqs = 0 - self._mTotalChar = 0 - # characters that fall in our sampling range - self._mFreqChar = 0 - - def get_charset_name(self): - if self._mNameProber: - return self._mNameProber.get_charset_name() - else: - return self._mModel['charsetName'] - - def feed(self, aBuf): - if not self._mModel['keepEnglishLetter']: - aBuf = self.filter_without_english_letters(aBuf) - aLen = len(aBuf) - if not aLen: - return self.get_state() - for c in aBuf: - order = self._mModel['charToOrderMap'][wrap_ord(c)] - if order < SYMBOL_CAT_ORDER: - self._mTotalChar += 1 - if order < SAMPLE_SIZE: - self._mFreqChar += 1 - if self._mLastOrder < SAMPLE_SIZE: - self._mTotalSeqs += 1 - if not self._mReversed: - i = (self._mLastOrder * SAMPLE_SIZE) + order - model = self._mModel['precedenceMatrix'][i] - else: # reverse the order of the letters in the lookup - i = (order * SAMPLE_SIZE) + self._mLastOrder - model = self._mModel['precedenceMatrix'][i] - self._mSeqCounters[model] += 1 - self._mLastOrder = order - - if self.get_state() == constants.eDetecting: - if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: - cf = self.get_confidence() - if cf > POSITIVE_SHORTCUT_THRESHOLD: - if constants._debug: - sys.stderr.write('%s confidence = %s, we have a' - 'winner\n' % - (self._mModel['charsetName'], cf)) - self._mState = constants.eFoundIt - elif cf < NEGATIVE_SHORTCUT_THRESHOLD: - if constants._debug: - sys.stderr.write('%s confidence = %s, below negative' - 'shortcut threshhold %s\n' % - (self._mModel['charsetName'], cf, - NEGATIVE_SHORTCUT_THRESHOLD)) - self._mState = constants.eNotMe - - return self.get_state() - - def get_confidence(self): - r = 0.01 - if self._mTotalSeqs > 0: - r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs - / self._mModel['mTypicalPositiveRatio']) - r = r * self._mFreqChar / self._mTotalChar - if r >= 1.0: - r = 0.99 - return r +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import sys +from . import constants +from .charsetprober import CharSetProber +from .compat import wrap_ord + +SAMPLE_SIZE = 64 +SB_ENOUGH_REL_THRESHOLD = 1024 +POSITIVE_SHORTCUT_THRESHOLD = 0.95 +NEGATIVE_SHORTCUT_THRESHOLD = 0.05 +SYMBOL_CAT_ORDER = 250 +NUMBER_OF_SEQ_CAT = 4 +POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 +#NEGATIVE_CAT = 0 + + +class SingleByteCharSetProber(CharSetProber): + def __init__(self, model, reversed=False, nameProber=None): + CharSetProber.__init__(self) + self._mModel = model + # TRUE if we need to reverse every pair in the model lookup + self._mReversed = reversed + # Optional auxiliary prober for name decision + self._mNameProber = nameProber + self.reset() + + def reset(self): + CharSetProber.reset(self) + # char order of last character + self._mLastOrder = 255 + self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT + self._mTotalSeqs = 0 + self._mTotalChar = 0 + # characters that fall in our sampling range + self._mFreqChar = 0 + + def get_charset_name(self): + if self._mNameProber: + return self._mNameProber.get_charset_name() + else: + return self._mModel['charsetName'] + + def feed(self, aBuf): + if not self._mModel['keepEnglishLetter']: + aBuf = self.filter_without_english_letters(aBuf) + aLen = len(aBuf) + if not aLen: + return self.get_state() + for c in aBuf: + order = self._mModel['charToOrderMap'][wrap_ord(c)] + if order < SYMBOL_CAT_ORDER: + self._mTotalChar += 1 + if order < SAMPLE_SIZE: + self._mFreqChar += 1 + if self._mLastOrder < SAMPLE_SIZE: + self._mTotalSeqs += 1 + if not self._mReversed: + i = (self._mLastOrder * SAMPLE_SIZE) + order + model = self._mModel['precedenceMatrix'][i] + else: # reverse the order of the letters in the lookup + i = (order * SAMPLE_SIZE) + self._mLastOrder + model = self._mModel['precedenceMatrix'][i] + self._mSeqCounters[model] += 1 + self._mLastOrder = order + + if self.get_state() == constants.eDetecting: + if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: + cf = self.get_confidence() + if cf > POSITIVE_SHORTCUT_THRESHOLD: + if constants._debug: + sys.stderr.write('%s confidence = %s, we have a' + 'winner\n' % + (self._mModel['charsetName'], cf)) + self._mState = constants.eFoundIt + elif cf < NEGATIVE_SHORTCUT_THRESHOLD: + if constants._debug: + sys.stderr.write('%s confidence = %s, below negative' + 'shortcut threshhold %s\n' % + (self._mModel['charsetName'], cf, + NEGATIVE_SHORTCUT_THRESHOLD)) + self._mState = constants.eNotMe + + return self.get_state() + + def get_confidence(self): + r = 0.01 + if self._mTotalSeqs > 0: + r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs + / self._mModel['mTypicalPositiveRatio']) + r = r * self._mFreqChar / self._mTotalChar + if r >= 1.0: + r = 0.99 + return r diff --git a/botocore/vendored/requests/packages/charade/sbcsgroupprober.py b/botocore/vendored/requests/packages/chardet/sbcsgroupprober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/sbcsgroupprober.py rename to botocore/vendored/requests/packages/chardet/sbcsgroupprober.py index b224814568..1b6196cd16 100644 --- a/botocore/vendored/requests/packages/charade/sbcsgroupprober.py +++ b/botocore/vendored/requests/packages/chardet/sbcsgroupprober.py @@ -1,69 +1,69 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .charsetgroupprober import CharSetGroupProber -from .sbcharsetprober import SingleByteCharSetProber -from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, - Latin5CyrillicModel, MacCyrillicModel, - Ibm866Model, Ibm855Model) -from .langgreekmodel import Latin7GreekModel, Win1253GreekModel -from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel -from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel -from .langthaimodel import TIS620ThaiModel -from .langhebrewmodel import Win1255HebrewModel -from .hebrewprober import HebrewProber - - -class SBCSGroupProber(CharSetGroupProber): - def __init__(self): - CharSetGroupProber.__init__(self) - self._mProbers = [ - SingleByteCharSetProber(Win1251CyrillicModel), - SingleByteCharSetProber(Koi8rModel), - SingleByteCharSetProber(Latin5CyrillicModel), - SingleByteCharSetProber(MacCyrillicModel), - SingleByteCharSetProber(Ibm866Model), - SingleByteCharSetProber(Ibm855Model), - SingleByteCharSetProber(Latin7GreekModel), - SingleByteCharSetProber(Win1253GreekModel), - SingleByteCharSetProber(Latin5BulgarianModel), - SingleByteCharSetProber(Win1251BulgarianModel), - SingleByteCharSetProber(Latin2HungarianModel), - SingleByteCharSetProber(Win1250HungarianModel), - SingleByteCharSetProber(TIS620ThaiModel), - ] - hebrewProber = HebrewProber() - logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, - False, hebrewProber) - visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, - hebrewProber) - hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) - self._mProbers.extend([hebrewProber, logicalHebrewProber, - visualHebrewProber]) - - self.reset() +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetgroupprober import CharSetGroupProber +from .sbcharsetprober import SingleByteCharSetProber +from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, + Latin5CyrillicModel, MacCyrillicModel, + Ibm866Model, Ibm855Model) +from .langgreekmodel import Latin7GreekModel, Win1253GreekModel +from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel +from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel +from .langthaimodel import TIS620ThaiModel +from .langhebrewmodel import Win1255HebrewModel +from .hebrewprober import HebrewProber + + +class SBCSGroupProber(CharSetGroupProber): + def __init__(self): + CharSetGroupProber.__init__(self) + self._mProbers = [ + SingleByteCharSetProber(Win1251CyrillicModel), + SingleByteCharSetProber(Koi8rModel), + SingleByteCharSetProber(Latin5CyrillicModel), + SingleByteCharSetProber(MacCyrillicModel), + SingleByteCharSetProber(Ibm866Model), + SingleByteCharSetProber(Ibm855Model), + SingleByteCharSetProber(Latin7GreekModel), + SingleByteCharSetProber(Win1253GreekModel), + SingleByteCharSetProber(Latin5BulgarianModel), + SingleByteCharSetProber(Win1251BulgarianModel), + SingleByteCharSetProber(Latin2HungarianModel), + SingleByteCharSetProber(Win1250HungarianModel), + SingleByteCharSetProber(TIS620ThaiModel), + ] + hebrewProber = HebrewProber() + logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, + False, hebrewProber) + visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, + hebrewProber) + hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) + self._mProbers.extend([hebrewProber, logicalHebrewProber, + visualHebrewProber]) + + self.reset() diff --git a/botocore/vendored/requests/packages/charade/sjisprober.py b/botocore/vendored/requests/packages/chardet/sjisprober.py similarity index 96% rename from botocore/vendored/requests/packages/charade/sjisprober.py rename to botocore/vendored/requests/packages/chardet/sjisprober.py index 9bb0cdcf1f..cd0e9e7078 100644 --- a/botocore/vendored/requests/packages/charade/sjisprober.py +++ b/botocore/vendored/requests/packages/chardet/sjisprober.py @@ -1,91 +1,91 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -import sys -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import SJISDistributionAnalysis -from .jpcntx import SJISContextAnalysis -from .mbcssm import SJISSMModel -from . import constants - - -class SJISProber(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(SJISSMModel) - self._mDistributionAnalyzer = SJISDistributionAnalysis() - self._mContextAnalyzer = SJISContextAnalysis() - self.reset() - - def reset(self): - MultiByteCharSetProber.reset(self) - self._mContextAnalyzer.reset() - - def get_charset_name(self): - return "SHIFT_JIS" - - def feed(self, aBuf): - aLen = len(aBuf) - for i in range(0, aLen): - codingState = self._mCodingSM.next_state(aBuf[i]) - if codingState == constants.eError: - if constants._debug: - sys.stderr.write(self.get_charset_name() - + ' prober hit error at byte ' + str(i) - + '\n') - self._mState = constants.eNotMe - break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - break - elif codingState == constants.eStart: - charLen = self._mCodingSM.get_current_charlen() - if i == 0: - self._mLastChar[1] = aBuf[0] - self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], - charLen) - self._mDistributionAnalyzer.feed(self._mLastChar, charLen) - else: - self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 - - charLen], charLen) - self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], - charLen) - - self._mLastChar[0] = aBuf[aLen - 1] - - if self.get_state() == constants.eDetecting: - if (self._mContextAnalyzer.got_enough_data() and - (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): - self._mState = constants.eFoundIt - - return self.get_state() - - def get_confidence(self): - contxtCf = self._mContextAnalyzer.get_confidence() - distribCf = self._mDistributionAnalyzer.get_confidence() - return max(contxtCf, distribCf) +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import sys +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import SJISDistributionAnalysis +from .jpcntx import SJISContextAnalysis +from .mbcssm import SJISSMModel +from . import constants + + +class SJISProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(SJISSMModel) + self._mDistributionAnalyzer = SJISDistributionAnalysis() + self._mContextAnalyzer = SJISContextAnalysis() + self.reset() + + def reset(self): + MultiByteCharSetProber.reset(self) + self._mContextAnalyzer.reset() + + def get_charset_name(self): + return self._mContextAnalyzer.get_charset_name() + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == constants.eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + + ' prober hit error at byte ' + str(i) + + '\n') + self._mState = constants.eNotMe + break + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == constants.eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], + charLen) + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 + - charLen], charLen) + self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], + charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if (self._mContextAnalyzer.got_enough_data() and + (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + contxtCf = self._mContextAnalyzer.get_confidence() + distribCf = self._mDistributionAnalyzer.get_confidence() + return max(contxtCf, distribCf) diff --git a/botocore/vendored/requests/packages/charade/universaldetector.py b/botocore/vendored/requests/packages/chardet/universaldetector.py similarity index 88% rename from botocore/vendored/requests/packages/charade/universaldetector.py rename to botocore/vendored/requests/packages/chardet/universaldetector.py index 6a8e68a8a7..476522b999 100644 --- a/botocore/vendored/requests/packages/charade/universaldetector.py +++ b/botocore/vendored/requests/packages/chardet/universaldetector.py @@ -1,168 +1,170 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from . import constants -import sys -import codecs -from .latin1prober import Latin1Prober # windows-1252 -from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets -from .sbcsgroupprober import SBCSGroupProber # single-byte character sets -from .escprober import EscCharSetProber # ISO-2122, etc. -import re - -MINIMUM_THRESHOLD = 0.20 -ePureAscii = 0 -eEscAscii = 1 -eHighbyte = 2 - - -class UniversalDetector: - def __init__(self): - self._highBitDetector = re.compile(b'[\x80-\xFF]') - self._escDetector = re.compile(b'(\033|~{)') - self._mEscCharSetProber = None - self._mCharSetProbers = [] - self.reset() - - def reset(self): - self.result = {'encoding': None, 'confidence': 0.0} - self.done = False - self._mStart = True - self._mGotData = False - self._mInputState = ePureAscii - self._mLastChar = b'' - if self._mEscCharSetProber: - self._mEscCharSetProber.reset() - for prober in self._mCharSetProbers: - prober.reset() - - def feed(self, aBuf): - if self.done: - return - - aLen = len(aBuf) - if not aLen: - return - - if not self._mGotData: - # If the data starts with BOM, we know it is UTF - if aBuf[:3] == codecs.BOM: - # EF BB BF UTF-8 with BOM - self.result = {'encoding': "UTF-8", 'confidence': 1.0} - elif aBuf[:4] in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): - # FF FE 00 00 UTF-32, little-endian BOM - # 00 00 FE FF UTF-32, big-endian BOM - self.result = {'encoding': "UTF-32", 'confidence': 1.0} - elif aBuf[:4] == b'\xFE\xFF\x00\x00': - # FE FF 00 00 UCS-4, unusual octet order BOM (3412) - self.result = { - 'encoding': "X-ISO-10646-UCS-4-3412", - 'confidence': 1.0 - } - elif aBuf[:4] == b'\x00\x00\xFF\xFE': - # 00 00 FF FE UCS-4, unusual octet order BOM (2143) - self.result = { - 'encoding': "X-ISO-10646-UCS-4-2143", - 'confidence': 1.0 - } - elif aBuf[:2] == codecs.BOM_LE or aBuf[:2] == codecs.BOM_BE: - # FF FE UTF-16, little endian BOM - # FE FF UTF-16, big endian BOM - self.result = {'encoding': "UTF-16", 'confidence': 1.0} - - self._mGotData = True - if self.result['encoding'] and (self.result['confidence'] > 0.0): - self.done = True - return - - if self._mInputState == ePureAscii: - if self._highBitDetector.search(aBuf): - self._mInputState = eHighbyte - elif ((self._mInputState == ePureAscii) and - self._escDetector.search(self._mLastChar + aBuf)): - self._mInputState = eEscAscii - - self._mLastChar = aBuf[-1:] - - if self._mInputState == eEscAscii: - if not self._mEscCharSetProber: - self._mEscCharSetProber = EscCharSetProber() - if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: - self.result = { - 'encoding': self._mEscCharSetProber.get_charset_name(), - 'confidence': self._mEscCharSetProber.get_confidence() - } - self.done = True - elif self._mInputState == eHighbyte: - if not self._mCharSetProbers: - self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), - Latin1Prober()] - for prober in self._mCharSetProbers: - if prober.feed(aBuf) == constants.eFoundIt: - self.result = {'encoding': prober.get_charset_name(), - 'confidence': prober.get_confidence()} - self.done = True - break - - def close(self): - if self.done: - return - if not self._mGotData: - if constants._debug: - sys.stderr.write('no data received!\n') - return - self.done = True - - if self._mInputState == ePureAscii: - self.result = {'encoding': 'ascii', 'confidence': 1.0} - return self.result - - if self._mInputState == eHighbyte: - proberConfidence = None - maxProberConfidence = 0.0 - maxProber = None - for prober in self._mCharSetProbers: - if not prober: - continue - proberConfidence = prober.get_confidence() - if proberConfidence > maxProberConfidence: - maxProberConfidence = proberConfidence - maxProber = prober - if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): - self.result = {'encoding': maxProber.get_charset_name(), - 'confidence': maxProber.get_confidence()} - return self.result - - if constants._debug: - sys.stderr.write('no probers hit minimum threshhold\n') - for prober in self._mCharSetProbers[0].mProbers: - if not prober: - continue - sys.stderr.write('%s confidence = %s\n' % - (prober.get_charset_name(), - prober.get_confidence())) +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from . import constants +import sys +import codecs +from .latin1prober import Latin1Prober # windows-1252 +from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets +from .sbcsgroupprober import SBCSGroupProber # single-byte character sets +from .escprober import EscCharSetProber # ISO-2122, etc. +import re + +MINIMUM_THRESHOLD = 0.20 +ePureAscii = 0 +eEscAscii = 1 +eHighbyte = 2 + + +class UniversalDetector: + def __init__(self): + self._highBitDetector = re.compile(b'[\x80-\xFF]') + self._escDetector = re.compile(b'(\033|~{)') + self._mEscCharSetProber = None + self._mCharSetProbers = [] + self.reset() + + def reset(self): + self.result = {'encoding': None, 'confidence': 0.0} + self.done = False + self._mStart = True + self._mGotData = False + self._mInputState = ePureAscii + self._mLastChar = b'' + if self._mEscCharSetProber: + self._mEscCharSetProber.reset() + for prober in self._mCharSetProbers: + prober.reset() + + def feed(self, aBuf): + if self.done: + return + + aLen = len(aBuf) + if not aLen: + return + + if not self._mGotData: + # If the data starts with BOM, we know it is UTF + if aBuf[:3] == codecs.BOM_UTF8: + # EF BB BF UTF-8 with BOM + self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0} + elif aBuf[:4] == codecs.BOM_UTF32_LE: + # FF FE 00 00 UTF-32, little-endian BOM + self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} + elif aBuf[:4] == codecs.BOM_UTF32_BE: + # 00 00 FE FF UTF-32, big-endian BOM + self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} + elif aBuf[:4] == b'\xFE\xFF\x00\x00': + # FE FF 00 00 UCS-4, unusual octet order BOM (3412) + self.result = { + 'encoding': "X-ISO-10646-UCS-4-3412", + 'confidence': 1.0 + } + elif aBuf[:4] == b'\x00\x00\xFF\xFE': + # 00 00 FF FE UCS-4, unusual octet order BOM (2143) + self.result = { + 'encoding': "X-ISO-10646-UCS-4-2143", + 'confidence': 1.0 + } + elif aBuf[:2] == codecs.BOM_LE: + # FF FE UTF-16, little endian BOM + self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} + elif aBuf[:2] == codecs.BOM_BE: + # FE FF UTF-16, big endian BOM + self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} + + self._mGotData = True + if self.result['encoding'] and (self.result['confidence'] > 0.0): + self.done = True + return + + if self._mInputState == ePureAscii: + if self._highBitDetector.search(aBuf): + self._mInputState = eHighbyte + elif ((self._mInputState == ePureAscii) and + self._escDetector.search(self._mLastChar + aBuf)): + self._mInputState = eEscAscii + + self._mLastChar = aBuf[-1:] + + if self._mInputState == eEscAscii: + if not self._mEscCharSetProber: + self._mEscCharSetProber = EscCharSetProber() + if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: + self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), + 'confidence': self._mEscCharSetProber.get_confidence()} + self.done = True + elif self._mInputState == eHighbyte: + if not self._mCharSetProbers: + self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), + Latin1Prober()] + for prober in self._mCharSetProbers: + if prober.feed(aBuf) == constants.eFoundIt: + self.result = {'encoding': prober.get_charset_name(), + 'confidence': prober.get_confidence()} + self.done = True + break + + def close(self): + if self.done: + return + if not self._mGotData: + if constants._debug: + sys.stderr.write('no data received!\n') + return + self.done = True + + if self._mInputState == ePureAscii: + self.result = {'encoding': 'ascii', 'confidence': 1.0} + return self.result + + if self._mInputState == eHighbyte: + proberConfidence = None + maxProberConfidence = 0.0 + maxProber = None + for prober in self._mCharSetProbers: + if not prober: + continue + proberConfidence = prober.get_confidence() + if proberConfidence > maxProberConfidence: + maxProberConfidence = proberConfidence + maxProber = prober + if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): + self.result = {'encoding': maxProber.get_charset_name(), + 'confidence': maxProber.get_confidence()} + return self.result + + if constants._debug: + sys.stderr.write('no probers hit minimum threshhold\n') + for prober in self._mCharSetProbers[0].mProbers: + if not prober: + continue + sys.stderr.write('%s confidence = %s\n' % + (prober.get_charset_name(), + prober.get_confidence())) diff --git a/botocore/vendored/requests/packages/charade/utf8prober.py b/botocore/vendored/requests/packages/chardet/utf8prober.py similarity index 97% rename from botocore/vendored/requests/packages/charade/utf8prober.py rename to botocore/vendored/requests/packages/chardet/utf8prober.py index 72c8d3d6a9..1c0bb5d8fd 100644 --- a/botocore/vendored/requests/packages/charade/utf8prober.py +++ b/botocore/vendored/requests/packages/chardet/utf8prober.py @@ -1,76 +1,76 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from . import constants -from .charsetprober import CharSetProber -from .codingstatemachine import CodingStateMachine -from .mbcssm import UTF8SMModel - -ONE_CHAR_PROB = 0.5 - - -class UTF8Prober(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(UTF8SMModel) - self.reset() - - def reset(self): - CharSetProber.reset(self) - self._mCodingSM.reset() - self._mNumOfMBChar = 0 - - def get_charset_name(self): - return "utf-8" - - def feed(self, aBuf): - for c in aBuf: - codingState = self._mCodingSM.next_state(c) - if codingState == constants.eError: - self._mState = constants.eNotMe - break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - break - elif codingState == constants.eStart: - if self._mCodingSM.get_current_charlen() >= 2: - self._mNumOfMBChar += 1 - - if self.get_state() == constants.eDetecting: - if self.get_confidence() > constants.SHORTCUT_THRESHOLD: - self._mState = constants.eFoundIt - - return self.get_state() - - def get_confidence(self): - unlike = 0.99 - if self._mNumOfMBChar < 6: - for i in range(0, self._mNumOfMBChar): - unlike = unlike * ONE_CHAR_PROB - return 1.0 - unlike - else: - return unlike +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from . import constants +from .charsetprober import CharSetProber +from .codingstatemachine import CodingStateMachine +from .mbcssm import UTF8SMModel + +ONE_CHAR_PROB = 0.5 + + +class UTF8Prober(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(UTF8SMModel) + self.reset() + + def reset(self): + CharSetProber.reset(self) + self._mCodingSM.reset() + self._mNumOfMBChar = 0 + + def get_charset_name(self): + return "utf-8" + + def feed(self, aBuf): + for c in aBuf: + codingState = self._mCodingSM.next_state(c) + if codingState == constants.eError: + self._mState = constants.eNotMe + break + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == constants.eStart: + if self._mCodingSM.get_current_charlen() >= 2: + self._mNumOfMBChar += 1 + + if self.get_state() == constants.eDetecting: + if self.get_confidence() > constants.SHORTCUT_THRESHOLD: + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + unlike = 0.99 + if self._mNumOfMBChar < 6: + for i in range(0, self._mNumOfMBChar): + unlike = unlike * ONE_CHAR_PROB + return 1.0 - unlike + else: + return unlike diff --git a/botocore/vendored/requests/packages/urllib3/__init__.py b/botocore/vendored/requests/packages/urllib3/__init__.py index 73071f7001..dfc82d0336 100644 --- a/botocore/vendored/requests/packages/urllib3/__init__.py +++ b/botocore/vendored/requests/packages/urllib3/__init__.py @@ -1,9 +1,3 @@ -# urllib3/__init__.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - """ urllib3 - Thread-safe connection pooling and re-using. """ @@ -23,7 +17,10 @@ from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .util import make_headers, get_host, Timeout +from .util.request import make_headers +from .util.url import get_host +from .util.timeout import Timeout +from .util.retry import Retry # Set default logging handler to avoid "No handler found" warnings. @@ -51,8 +48,19 @@ def add_stderr_logger(level=logging.DEBUG): handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) logger.addHandler(handler) logger.setLevel(level) - logger.debug('Added an stderr logging handler to logger: %s' % __name__) + logger.debug('Added a stderr logging handler to logger: %s' % __name__) return handler # ... Clean up. del NullHandler + + +# Set security warning to only go off once by default. +import warnings +warnings.simplefilter('always', exceptions.SecurityWarning) + +def disable_warnings(category=exceptions.HTTPWarning): + """ + Helper for quickly disabling all urllib3 warnings. + """ + warnings.simplefilter('ignore', category) diff --git a/botocore/vendored/requests/packages/urllib3/_collections.py b/botocore/vendored/requests/packages/urllib3/_collections.py index 282b8d5e05..784342a4eb 100644 --- a/botocore/vendored/requests/packages/urllib3/_collections.py +++ b/botocore/vendored/requests/packages/urllib3/_collections.py @@ -1,19 +1,23 @@ -# urllib3/_collections.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php +from collections import Mapping, MutableMapping +try: + from threading import RLock +except ImportError: # Platform-specific: No threads available + class RLock: + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + pass -from collections import MutableMapping -from threading import RLock try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict +from .packages.six import iterkeys, itervalues -__all__ = ['RecentlyUsedContainer'] +__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] _Null = object() @@ -81,8 +85,7 @@ def __iter__(self): def clear(self): with self.lock: # Copy pointers to all values, then wipe the mapping - # under Python 2, this copies the list of values twice :-| - values = list(self._container.values()) + values = list(itervalues(self._container)) self._container.clear() if self.dispose_func: @@ -91,4 +94,105 @@ def clear(self): def keys(self): with self.lock: - return self._container.keys() + return list(iterkeys(self._container)) + + +class HTTPHeaderDict(MutableMapping): + """ + :param headers: + An iterable of field-value pairs. Must not contain multiple field names + when compared case-insensitively. + + :param kwargs: + Additional field-value pairs to pass in to ``dict.update``. + + A ``dict`` like container for storing HTTP Headers. + + Field names are stored and compared case-insensitively in compliance with + RFC 7230. Iteration provides the first case-sensitive key seen for each + case-insensitive pair. + + Using ``__setitem__`` syntax overwrites fields that compare equal + case-insensitively in order to maintain ``dict``'s api. For fields that + compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` + in a loop. + + If multiple fields that are equal case-insensitively are passed to the + constructor or ``.update``, the behavior is undefined and some will be + lost. + + >>> headers = HTTPHeaderDict() + >>> headers.add('Set-Cookie', 'foo=bar') + >>> headers.add('set-cookie', 'baz=quxx') + >>> headers['content-length'] = '7' + >>> headers['SET-cookie'] + 'foo=bar, baz=quxx' + >>> headers['Content-Length'] + '7' + + If you want to access the raw headers with their original casing + for debugging purposes you can access the private ``._data`` attribute + which is a normal python ``dict`` that maps the case-insensitive key to a + list of tuples stored as (case-sensitive-original-name, value). Using the + structure from above as our example: + + >>> headers._data + {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], + 'content-length': [('content-length', '7')]} + """ + + def __init__(self, headers=None, **kwargs): + self._data = {} + if headers is None: + headers = {} + self.update(headers, **kwargs) + + def add(self, key, value): + """Adds a (name, value) pair, doesn't overwrite the value if it already + exists. + + >>> headers = HTTPHeaderDict(foo='bar') + >>> headers.add('Foo', 'baz') + >>> headers['foo'] + 'bar, baz' + """ + self._data.setdefault(key.lower(), []).append((key, value)) + + def getlist(self, key): + """Returns a list of all the values for the named field. Returns an + empty list if the key doesn't exist.""" + return self[key].split(', ') if key in self else [] + + def copy(self): + h = HTTPHeaderDict() + for key in self._data: + for rawkey, value in self._data[key]: + h.add(rawkey, value) + return h + + def __eq__(self, other): + if not isinstance(other, Mapping): + return False + other = HTTPHeaderDict(other) + return dict((k1, self[k1]) for k1 in self._data) == \ + dict((k2, other[k2]) for k2 in other._data) + + def __getitem__(self, key): + values = self._data[key.lower()] + return ', '.join(value[1] for value in values) + + def __setitem__(self, key, value): + self._data[key.lower()] = [(key, value)] + + def __delitem__(self, key): + del self._data[key.lower()] + + def __len__(self): + return len(self._data) + + def __iter__(self): + for headers in itervalues(self._data): + yield headers[0][0] + + def __repr__(self): + return '%s(%r)' % (self.__class__.__name__, dict(self.items())) diff --git a/botocore/vendored/requests/packages/urllib3/connection.py b/botocore/vendored/requests/packages/urllib3/connection.py index e240786a67..e5de769d8c 100644 --- a/botocore/vendored/requests/packages/urllib3/connection.py +++ b/botocore/vendored/requests/packages/urllib3/connection.py @@ -1,50 +1,183 @@ -# urllib3/connection.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - +import datetime +import sys import socket from socket import timeout as SocketTimeout +import warnings +from .packages import six -try: # Python 3 - from http.client import HTTPConnection, HTTPException +try: # Python 3 + from http.client import HTTPConnection as _HTTPConnection, HTTPException except ImportError: - from httplib import HTTPConnection, HTTPException + from httplib import HTTPConnection as _HTTPConnection, HTTPException + class DummyConnection(object): "Used to detect a failed ConnectionCls import." pass -try: # Compiled with SSL? - ssl = None + +try: # Compiled with SSL? HTTPSConnection = DummyConnection + import ssl + BaseSSLError = ssl.SSLError +except (ImportError, AttributeError): # Platform-specific: No SSL. + ssl = None class BaseSSLError(BaseException): pass - try: # Python 3 - from http.client import HTTPSConnection - except ImportError: - from httplib import HTTPSConnection - import ssl - BaseSSLError = ssl.SSLError +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + ConnectionError = ConnectionError +except NameError: # Python 2: + class ConnectionError(Exception): + pass -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass from .exceptions import ( ConnectTimeoutError, + SystemTimeWarning, + SecurityWarning, ) from .packages.ssl_match_hostname import match_hostname -from .util import ( - assert_fingerprint, + +from .util.ssl_ import ( resolve_cert_reqs, resolve_ssl_version, ssl_wrap_socket, + assert_fingerprint, ) + +from .util import connection + +port_by_scheme = { + 'http': 80, + 'https': 443, +} + +RECENT_DATE = datetime.date(2014, 1, 1) + + +class HTTPConnection(_HTTPConnection, object): + """ + Based on httplib.HTTPConnection but provides an extra constructor + backwards-compatibility layer between older and newer Pythons. + + Additional keyword parameters are used to configure attributes of the connection. + Accepted parameters include: + + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + + .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x + + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass:: + + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). + """ + + default_port = port_by_scheme['http'] + + #: Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + + #: Whether this connection verifies the host's certificate. + is_verified = False + + def __init__(self, *args, **kw): + if six.PY3: # Python 3 + kw.pop('strict', None) + + # Pre-set source_address in case we have an older Python like 2.6. + self.source_address = kw.get('source_address') + + if sys.version_info < (2, 7): # Python 2.6 + # _HTTPConnection on Python 2.6 will balk at this keyword arg, but + # not newer versions. We can still use it when creating a + # connection though, so we pop it *after* we have saved it as + # self.source_address. + kw.pop('source_address', None) + + #: The socket options provided by the user. If no options are + #: provided, we use the default options. + self.socket_options = kw.pop('socket_options', self.default_socket_options) + + # Superclass also sets self.source_address in Python 2.7+. + _HTTPConnection.__init__(self, *args, **kw) + + def _new_conn(self): + """ Establish a socket connection and set nodelay settings on it. + + :return: New socket connection. + """ + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + + if self.socket_options: + extra_kw['socket_options'] = self.socket_options + + try: + conn = connection.create_connection( + (self.host, self.port), self.timeout, **extra_kw) + + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + return conn + + def _prepare_conn(self, conn): + self.sock = conn + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): + # TODO: Fix tunnel so it doesn't depend on self.sock state. + self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + +class HTTPSConnection(HTTPConnection): + default_port = port_by_scheme['https'] + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw): + + HTTPConnection.__init__(self, host, port, strict=strict, + timeout=timeout, **kw) + + self.key_file = key_file + self.cert_file = cert_file + + # Required property for Google AppEngine 1.9.0 which otherwise causes + # HTTPS requests to go out as HTTP. (See Issue #356) + self._protocol = 'https' + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file) + + class VerifiedHTTPSConnection(HTTPSConnection): """ Based on httplib.HTTPSConnection but wraps the socket with @@ -53,6 +186,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None + assert_fingerprint = None def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, @@ -67,41 +201,62 @@ def set_cert(self, key_file=None, cert_file=None, def connect(self): # Add certificate verification - try: - sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout, - ) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) + conn = self._new_conn() resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) - if self._tunnel_host: - self.sock = sock + hostname = self.host + if getattr(self, '_tunnel_host', None): + # _tunnel_host was added in Python 2.6.3 + # (See: http://hg.python.org/cpython/rev/0f57b30a152f) + + self.sock = conn # Calls self._set_hostport(), so self.host is # self._tunnel_host below. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + # Override the host with the one we're requesting data from. + hostname = self._tunnel_host + + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn(( + 'System time is way off (before {0}). This will probably ' + 'lead to SSL verification errors').format(RECENT_DATE), + SystemTimeWarning + ) # Wrap socket using verification with the root certs in # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, cert_reqs=resolved_cert_reqs, ca_certs=self.ca_certs, - server_hostname=self.host, + server_hostname=hostname, ssl_version=resolved_ssl_version) - if resolved_cert_reqs != ssl.CERT_NONE: - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif resolved_cert_reqs != ssl.CERT_NONE \ + and self.assert_hostname is not False: + cert = self.sock.getpeercert() + if not cert.get('subjectAltName', ()): + warnings.warn(( + 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. ' + 'This feature is being removed by major browsers and deprecated by RFC 2818. ' + '(See https://github.com/shazow/urllib3/issues/497 for details.)'), + SecurityWarning + ) + match_hostname(cert, self.assert_hostname or hostname) + + self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED + or self.assert_fingerprint is not None) if ssl: + # Make a copy for testing. + UnverifiedHTTPSConnection = HTTPSConnection HTTPSConnection = VerifiedHTTPSConnection diff --git a/botocore/vendored/requests/packages/urllib3/connectionpool.py b/botocore/vendored/requests/packages/urllib3/connectionpool.py index 1e5814357a..70ee4eed5e 100644 --- a/botocore/vendored/requests/packages/urllib3/connectionpool.py +++ b/botocore/vendored/requests/packages/urllib3/connectionpool.py @@ -1,16 +1,12 @@ -# urllib3/connectionpool.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import errno import logging +import sys +import warnings from socket import error as SocketError, timeout as SocketTimeout import socket -try: # Python 3 +try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full @@ -19,30 +15,32 @@ from .exceptions import ( ClosedPoolError, - ConnectTimeoutError, + ProtocolError, EmptyPoolError, HostChangedError, + LocationValueError, MaxRetryError, + ProxyError, + ReadTimeoutError, SSLError, TimeoutError, - ReadTimeoutError, - ProxyError, + InsecureRequestWarning, ) from .packages.ssl_match_hostname import CertificateError from .packages import six from .connection import ( + port_by_scheme, DummyConnection, HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, - HTTPException, BaseSSLError, + HTTPException, BaseSSLError, ConnectionError ) from .request import RequestMethods from .response import HTTPResponse -from .util import ( - assert_fingerprint, - get_host, - is_connection_dropped, - Timeout, -) + +from .util.connection import is_connection_dropped +from .util.retry import Retry +from .util.timeout import Timeout +from .util.url import get_host xrange = six.moves.xrange @@ -51,14 +49,8 @@ _Default = object() -port_by_scheme = { - 'http': 80, - 'https': 443, -} - ## Pool objects - class ConnectionPool(object): """ Base class for all connection pools, such as @@ -69,10 +61,11 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): - # httplib doesn't like it when we include brackets in ipv6 addresses - host = host.strip('[]') + if not host: + raise LocationValueError("No host specified.") - self.host = host + # httplib doesn't like it when we include brackets in ipv6 addresses + self.host = host.strip('[]') self.port = port def __str__(self): @@ -82,6 +75,7 @@ def __str__(self): # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) + class HTTPConnectionPool(ConnectionPool, RequestMethods): """ Thread-safe connection pool for one host. @@ -126,6 +120,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Headers to include with all requests, unless other headers are given explicitly. + :param retries: + Retry configuration to use by default with requests in this pool. + :param _proxy: Parsed proxy URL, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" @@ -133,6 +130,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param _proxy_headers: A dictionary with proxy headers, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" + + :param \**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. """ scheme = 'http' @@ -140,18 +141,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, - headers=None, _proxy=None, _proxy_headers=None): + headers=None, retries=None, + _proxy=None, _proxy_headers=None, + **conn_kw): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) self.strict = strict - # This is for backwards compatibility and can be removed once a timeout - # can only be set to a Timeout object if not isinstance(timeout, Timeout): timeout = Timeout.from_float(timeout) + if retries is None: + retries = Retry.DEFAULT + self.timeout = timeout + self.retries = retries self.pool = self.QueueCls(maxsize) self.block = block @@ -166,22 +171,26 @@ def __init__(self, host, port=None, strict=False, # These are mostly for testing and debugging purposes. self.num_connections = 0 self.num_requests = 0 + self.conn_kw = conn_kw + + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault('socket_options', []) def _new_conn(self): """ - Return a fresh :class:`httplib.HTTPConnection`. + Return a fresh :class:`HTTPConnection`. """ self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - - return self.ConnectionCls(host=self.host, port=self.port, + conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, - **extra_params) + strict=self.strict, **self.conn_kw) + return conn def _get_conn(self, timeout=None): """ @@ -199,7 +208,7 @@ def _get_conn(self, timeout=None): try: conn = self.pool.get(block=self.block, timeout=timeout) - except AttributeError: # self.pool is None + except AttributeError: # self.pool is None raise ClosedPoolError(self, "Pool is closed.") except Empty: @@ -213,6 +222,11 @@ def _get_conn(self, timeout=None): if conn and is_connection_dropped(conn): log.info("Resetting dropped connection: %s" % self.host) conn.close() + if getattr(conn, 'auto_open', 1) == 0: + # This is a proxied connection that has been mutated by + # httplib._tunnel() and cannot be reused (since it would + # attempt to bypass the proxy) + conn = None return conn or self._new_conn() @@ -232,19 +246,26 @@ def _put_conn(self, conn): """ try: self.pool.put(conn, block=False) - return # Everything is dandy, done. + return # Everything is dandy, done. except AttributeError: # self.pool is None. pass except Full: # This should never happen if self.block == True - log.warning("HttpConnectionPool is full, discarding connection: %s" - % self.host) + log.warning( + "Connection pool is full, discarding connection: %s" % + self.host) # Connection never got put back into the pool, close it. if conn: conn.close() + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + pass + def _get_timeout(self, timeout): """ Helper that always returns a :class:`urllib3.util.Timeout` """ if timeout is _Default: @@ -257,10 +278,27 @@ def _get_timeout(self, timeout): # can be removed later return Timeout.from_float(timeout) + def _raise_timeout(self, err, url, timeout_value): + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + + if isinstance(err, SocketTimeout): + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # See the above comment about EAGAIN in Python 3. In Python 2 we have + # to specifically catch it and throw the timeout error + if hasattr(err, 'errno') and err.errno in _blocking_errnos: + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ - Perform a request on a given httplib connection object taken from our + Perform a request on a given urllib connection object taken from our pool. :param conn: @@ -276,23 +314,26 @@ def _make_request(self, conn, method, url, timeout=_Default, self.num_requests += 1 timeout_obj = self._get_timeout(timeout) + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + # Trigger any extra validation we need to do. try: - timeout_obj.start_connect() - conn.timeout = timeout_obj.connect_timeout - # conn.request() calls httplib.*.request, not the method in - # urllib3.request. It also calls makefile (recv) on the socket. - conn.request(method, url, **httplib_request_kw) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, timeout_obj.connect_timeout)) + self._validate_conn(conn) + except (SocketTimeout, BaseSSLError) as e: + # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise + + # conn.request() calls httplib.*.request, not the method in + # urllib3.request. It also calls makefile (recv) on the socket. + conn.request(method, url, **httplib_request_kw) # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout # App Engine doesn't have a sock attr - if hasattr(conn, 'sock'): + if getattr(conn, 'sock', None): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -300,41 +341,20 @@ def _make_request(self, conn, method, url, timeout=_Default, # timeouts, check for a zero timeout before making the request. if read_timeout == 0: raise ReadTimeoutError( - self, url, - "Read timed out. (read timeout=%s)" % read_timeout) + self, url, "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) - else: # None or a value + else: # None or a value conn.sock.settimeout(read_timeout) # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7+, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 2.6 and older + except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() - except SocketTimeout: - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - - except BaseSSLError as e: - # Catch possible read timeouts thrown as SSL errors. If not the - # case, rethrow the original. We need to do this because of: - # http://bugs.python.org/issue10272 - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") - - raise - - except SocketError as e: # Platform-specific: Python 2 - # See the above comment about EAGAIN in Python 3. In Python 2 we - # have to specifically catch it and throw the timeout error - if e.errno in _blocking_errnos: - raise ReadTimeoutError( - self, url, - "Read timed out. (read timeout=%s)" % read_timeout) - + except (SocketTimeout, BaseSSLError, SocketError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) raise # AppEngine doesn't have a version attr. @@ -358,7 +378,7 @@ def close(self): conn.close() except Empty: - pass # Done. + pass # Done. def is_same_host(self, url): """ @@ -371,13 +391,15 @@ def is_same_host(self, url): # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) + # Use explicit default port for comparison when none is given if self.port and not port: - # Use explicit default port for comparison when none is given. port = port_by_scheme.get(scheme) + elif not self.port and port == port_by_scheme.get(scheme): + port = None return (scheme, host, port) == (self.scheme, self.host, self.port) - def urlopen(self, method, url, body=None, headers=None, retries=3, + def urlopen(self, method, url, body=None, headers=None, retries=None, redirect=True, assert_same_host=True, timeout=_Default, pool_timeout=None, release_conn=None, **response_kw): """ @@ -411,11 +433,25 @@ def urlopen(self, method, url, body=None, headers=None, retries=3, these headers completely replace any pool-specific headers. :param retries: - Number of retries to allow before raising a MaxRetryError exception. + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307, 308). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -449,23 +485,34 @@ def urlopen(self, method, url, body=None, headers=None, retries=3, if headers is None: headers = self.headers - if retries < 0: - raise MaxRetryError(self, url) + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect, default=self.retries) if release_conn is None: release_conn = response_kw.get('preload_content', True) # Check host if assert_same_host and not self.is_same_host(url): - raise HostChangedError(self, url, retries - 1) + raise HostChangedError(self, url, retries) conn = None + # Merge the proxy headers. Only do this in HTTP. We have to copy the + # headers dict so we can safely change it without those changes being + # reflected in anyone else's copy. + if self.scheme == 'http': + headers = headers.copy() + headers.update(self.proxy_headers) + + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + try: - # Request a connection from the queue + # Request a connection from the queue. conn = self._get_conn(timeout=pool_timeout) - # Make the request on the httplib connection object + # Make the request on the httplib connection object. httplib_response = self._make_request(conn, method, url, timeout=timeout, body=body, headers=headers) @@ -488,38 +535,38 @@ def urlopen(self, method, url, body=None, headers=None, retries=3, # ``response.read()``) except Empty: - # Timed out by queue + # Timed out by queue. raise EmptyPoolError(self, "No pool connections are available.") - except BaseSSLError as e: + except (BaseSSLError, CertificateError) as e: + # Close the connection. If a connection is reused on which there + # was a Certificate error, the next request will certainly raise + # another Certificate error. + if conn: + conn.close() + conn = None raise SSLError(e) - except CertificateError as e: - # Name mismatch - raise SSLError(e) + except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: + if conn: + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + conn.close() + conn = None - except TimeoutError as e: - # Connection broken, discard. - conn = None - # Save the error off for retry logic. - err = e - - if retries == 0: - raise + stacktrace = sys.exc_info()[2] + if isinstance(e, SocketError) and self.proxy: + e = ProxyError('Cannot connect to proxy.', e) + elif isinstance(e, (SocketError, HTTPException)): + e = ProtocolError('Connection aborted.', e) - except (HTTPException, SocketError) as e: - if isinstance(e, SocketError) and self.proxy is not None: - raise ProxyError('Cannot connect to proxy. ' - 'Socket error: %s.' % e) + retries = retries.increment(method, url, error=e, + _pool=self, _stacktrace=stacktrace) + retries.sleep() - # Connection broken, discard. It will be replaced next _get_conn(). - conn = None - # This is necessary so we can access e below + # Keep track of the error for the retry warning. err = e - if retries == 0: - raise MaxRetryError(self, url, e) - finally: if release_conn: # Put the connection back to be reused. If the connection is @@ -529,9 +576,9 @@ def urlopen(self, method, url, body=None, headers=None, retries=3, if not conn: # Try again - log.warn("Retrying (%d attempts remain) after connection " - "broken by '%r': %s" % (retries, err, url)) - return self.urlopen(method, url, body, headers, retries - 1, + log.warning("Retrying (%r) after connection " + "broken by '%r': %s" % (retries, err, url)) + return self.urlopen(method, url, body, headers, retries, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, release_conn=release_conn, **response_kw) @@ -541,11 +588,31 @@ def urlopen(self, method, url, body=None, headers=None, retries=3, if redirect_location: if response.status == 303: method = 'GET' + + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, body, headers, - retries - 1, redirect, assert_same_host, - timeout=timeout, pool_timeout=pool_timeout, - release_conn=release_conn, **response_kw) + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) + + # Check if we should retry the HTTP response. + if retries.is_forced_retry(method, status_code=response.status): + retries = retries.increment(method, url, response=response, _pool=self) + retries.sleep() + log.info("Forced retry: %s" % url) + return self.urlopen(method, url, body, headers, + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) return response @@ -556,7 +623,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:`httplib.HTTPSConnection`. + instead of :class:`.HTTPSConnection`. :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. @@ -572,15 +639,17 @@ class HTTPSConnectionPool(HTTPConnectionPool): ConnectionCls = HTTPSConnection def __init__(self, host, port=None, - strict=False, timeout=None, maxsize=1, - block=False, headers=None, + strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, + block=False, headers=None, retries=None, _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, - assert_hostname=None, assert_fingerprint=None): + assert_hostname=None, assert_fingerprint=None, + **conn_kw): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers) + block, headers, retries, _proxy, _proxy_headers, + **conn_kw) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -610,7 +679,12 @@ def _prepare_conn(self, conn): set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 set_tunnel = conn._set_tunnel - set_tunnel(self.host, self.port, self.proxy_headers) + + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. conn.connect() @@ -636,16 +710,29 @@ def _new_conn(self): actual_host = self.proxy.host actual_port = self.proxy.port - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - conn = self.ConnectionCls(host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, - **extra_params) + strict=self.strict, **self.conn_kw) return self._prepare_conn(conn) + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + super(HTTPSConnectionPool, self)._validate_conn(conn) + + # Force connect early to allow us to validate the connection. + if not getattr(conn, 'sock', None): # AppEngine might not have `.sock` + conn.connect() + + if not conn.is_verified: + warnings.warn(( + 'Unverified HTTPS request is being made. ' + 'Adding certificate verification is strongly advised. See: ' + 'https://urllib3.readthedocs.org/en/latest/security.html'), + InsecureRequestWarning) + def connection_from_url(url, **kw): """ @@ -662,7 +749,7 @@ def connection_from_url(url, **kw): :class:`.ConnectionPool`. Useful for specifying things like timeout, maxsize, headers, etc. - Example: :: + Example:: >>> conn = connection_from_url('http://google.com/') >>> r = conn.request('GET', '/') diff --git a/botocore/vendored/requests/packages/urllib3/contrib/ntlmpool.py b/botocore/vendored/requests/packages/urllib3/contrib/ntlmpool.py index b8cd933034..c6b266f5d1 100644 --- a/botocore/vendored/requests/packages/urllib3/contrib/ntlmpool.py +++ b/botocore/vendored/requests/packages/urllib3/contrib/ntlmpool.py @@ -1,9 +1,3 @@ -# urllib3/contrib/ntlmpool.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - """ NTLM authenticating pool, contributed by erikcederstran diff --git a/botocore/vendored/requests/packages/urllib3/contrib/pyopenssl.py b/botocore/vendored/requests/packages/urllib3/contrib/pyopenssl.py index 91bc2fa4f7..8229090cb6 100644 --- a/botocore/vendored/requests/packages/urllib3/contrib/pyopenssl.py +++ b/botocore/vendored/requests/packages/urllib3/contrib/pyopenssl.py @@ -1,4 +1,7 @@ -'''SSL with SNI-support for Python 2. +'''SSL with SNI_-support for Python 2. Follow these instructions if you would +like to verify SSL certificates in Python 2. Note, the default libraries do +*not* do certificate checking; you need to do additional work to validate +certificates yourself. This needs the following packages installed: @@ -6,9 +9,15 @@ * ndg-httpsclient (tested with 0.3.2) * pyasn1 (tested with 0.1.6) -To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`. -This can be done in a ``sitecustomize`` module, or at any other time before -your application begins using ``urllib3``, like this:: +You can install them with the following command: + + pip install pyopenssl ndg-httpsclient pyasn1 + +To activate certificate checking, call +:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code +before you begin making HTTP requests. This can be done in a ``sitecustomize`` +module, or at any other time before your application begins using ``urllib3``, +like this:: try: import urllib3.contrib.pyopenssl @@ -18,18 +27,39 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. + +Activating this module also has the positive side effect of disabling SSL/TLS +compression in Python 2 (see `CRIME attack`_). + +If you want to configure the default list of supported cipher suites, you can +set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. + +Module Variables +---------------- + +:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. + Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: + ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS`` + +.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) + ''' -from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT -from ndg.httpsclient.subj_alt_name import SubjectAltName +try: + from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT + from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName +except SyntaxError as e: + raise ImportError(e) + import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder -from socket import _fileobject +from pyasn1.type import univ, constraint +from socket import _fileobject, timeout import ssl import select -from cStringIO import StringIO -from .. import connectionpool +from .. import connection from .. import util __all__ = ['inject_into_urllib3', 'extract_from_urllib3'] @@ -40,9 +70,14 @@ # Map from urllib3 to PyOpenSSL compatible parameter-values. _openssl_versions = { ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, - ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, } + +try: + _openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD}) +except AttributeError: + pass + _openssl_verify = { ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, @@ -50,25 +85,53 @@ + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ + "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ + "!aNULL:!MD5:!DSS" + orig_util_HAS_SNI = util.HAS_SNI -orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket +orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket def inject_into_urllib3(): 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' - connectionpool.ssl_wrap_socket = ssl_wrap_socket + connection.ssl_wrap_socket = ssl_wrap_socket util.HAS_SNI = HAS_SNI def extract_from_urllib3(): 'Undo monkey-patching by :func:`inject_into_urllib3`.' - connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket + connection.ssl_wrap_socket = orig_connection_ssl_wrap_socket util.HAS_SNI = orig_util_HAS_SNI +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +class SubjectAltName(BaseSubjectAltName): + '''ASN.1 implementation for subjectAltNames support''' + + # There is no limit to how many SAN certificates a certificate may have, + # however this needs to have some limit so we'll set an arbitrarily high + # limit. + sizeSpec = univ.SequenceOf.sizeSpec + \ + constraint.ValueSizeConstraint(1, 1024) + + ### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. def get_subj_alt_name(peer_cert): # Search through extensions @@ -100,193 +163,68 @@ def get_subj_alt_name(peer_cert): return dns_name -class fileobject(_fileobject): - - def read(self, size=-1): - # Use max, disallow tiny reads in a loop as they are very inefficient. - # We never leave read() with any leftover data from a new recv() call - # in our internal buffer. - rbufsize = max(self._rbufsize, self.default_bufsize) - # Our use of StringIO rather than lists of string objects returned by - # recv() minimizes memory usage and fragmentation that occurs when - # rbufsize is large compared to the typical return value of recv(). - buf = self._rbuf - buf.seek(0, 2) # seek end - if size < 0: - # Read until EOF - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(rbufsize) - except OpenSSL.SSL.WantReadError: - continue - if not data: - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or EOF seen, whichever comes first - buf_len = buf.tell() - if buf_len >= size: - # Already have size bytes in our buffer? Extract and return. - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - left = size - buf_len - # recv() will malloc the amount of memory given as its - # parameter even though it often returns much less data - # than that. The returned data string is short lived - # as we copy it into a StringIO and free it. This avoids - # fragmentation issues on many platforms. - try: - data = self._sock.recv(left) - except OpenSSL.SSL.WantReadError: - continue - if not data: - break - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid buffer data copies when: - # - We have no data in our buffer. - # AND - # - Our call to recv returned exactly the - # number of bytes we were asked to read. - return data - if n == left: - buf.write(data) - del data # explicit free - break - assert n <= left, "recv(%d) returned %d bytes" % (left, n) - buf.write(data) - buf_len += n - del data # explicit free - #assert buf_len == buf.tell() - return buf.getvalue() - - def readline(self, size=-1): - buf = self._rbuf - buf.seek(0, 2) # seek end - if buf.tell() > 0: - # check if we already have it in our buffer - buf.seek(0) - bline = buf.readline(size) - if bline.endswith('\n') or len(bline) == size: - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return bline - del bline - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - buf.seek(0) - buffers = [buf.read()] - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - data = None - recv = self._sock.recv - while True: - try: - while data != "\n": - data = recv(1) - if not data: - break - buffers.append(data) - except OpenSSL.SSL.WantReadError: - continue - break - return "".join(buffers) - - buf.seek(0, 2) # seek end - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except OpenSSL.SSL.WantReadError: - continue - if not data: - break - nl = data.find('\n') - if nl >= 0: - nl += 1 - buf.write(data[:nl]) - self._rbuf.write(data[nl:]) - del data - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or \n or EOF seen, whichever comes first - buf.seek(0, 2) # seek end - buf_len = buf.tell() - if buf_len >= size: - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except OpenSSL.SSL.WantReadError: - continue - if not data: - break - left = size - buf_len - # did we just receive a newline? - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - # save the excess data to _rbuf - self._rbuf.write(data[nl:]) - if buf_len: - buf.write(data[:nl]) - break - else: - # Shortcut. Avoid data copy through buf when returning - # a substring of our first recv(). - return data[:nl] - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid data copy through buf when - # returning exactly all of our first recv(). - return data - if n >= left: - buf.write(data[:left]) - self._rbuf.write(data[left:]) - break - buf.write(data) - buf_len += n - #assert buf_len == buf.tell() - return buf.getvalue() - - class WrappedSocket(object): - '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' + '''API-compatibility wrapper for Python OpenSSL's Connection-class. - def __init__(self, connection, socket): + Note: _makefile_refs, _drop() and _reuse() are needed for the garbage + collector of pypy. + ''' + + def __init__(self, connection, socket, suppress_ragged_eofs=True): self.connection = connection self.socket = socket + self.suppress_ragged_eofs = suppress_ragged_eofs + self._makefile_refs = 0 def fileno(self): return self.socket.fileno() def makefile(self, mode, bufsize=-1): - return fileobject(self.connection, mode, bufsize) + self._makefile_refs += 1 + return _fileobject(self, mode, bufsize, close=True) + + def recv(self, *args, **kwargs): + try: + data = self.connection.recv(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return b'' + else: + raise + except OpenSSL.SSL.WantReadError: + rd, wd, ed = select.select( + [self.socket], [], [], self.socket.gettimeout()) + if not rd: + raise timeout('The read operation timed out') + else: + return self.recv(*args, **kwargs) + else: + return data def settimeout(self, timeout): return self.socket.settimeout(timeout) + def _send_until_done(self, data): + while True: + try: + return self.connection.send(data) + except OpenSSL.SSL.WantWriteError: + _, wlist, _ = select.select([], [self.socket], [], + self.socket.gettimeout()) + if not wlist: + raise timeout() + continue + def sendall(self, data): - return self.connection.sendall(data) + while len(data): + sent = self._send_until_done(data) + data = data[sent:] def close(self): - return self.connection.shutdown() + if self._makefile_refs < 1: + return self.connection.shutdown() + else: + self._makefile_refs -= 1 def getpeercert(self, binary_form=False): x509 = self.connection.get_peer_certificate() @@ -309,6 +247,15 @@ def getpeercert(self, binary_form=False): ] } + def _reuse(self): + self._makefile_refs += 1 + + def _drop(self): + if self._makefile_refs < 1: + self.close() + else: + self._makefile_refs -= 1 + def _verify_callback(cnx, x509, err_no, err_depth, return_code): return err_no == 0 @@ -319,6 +266,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ssl_version=None): ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) if certfile: + keyfile = keyfile or certfile # Match behaviour of the normal python ssl library ctx.use_certificate_file(certfile) if keyfile: ctx.use_privatekey_file(keyfile) @@ -329,6 +277,15 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ctx.load_verify_locations(ca_certs, None) except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + else: + ctx.set_default_verify_paths() + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + ctx.set_options(OP_NO_COMPRESSION) + + # Set list of supported ciphersuites. + ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST) cnx = OpenSSL.SSL.Connection(ctx, sock) cnx.set_tlsext_host_name(server_hostname) diff --git a/botocore/vendored/requests/packages/urllib3/exceptions.py b/botocore/vendored/requests/packages/urllib3/exceptions.py index 98ef9abc7f..0c6fd3c51b 100644 --- a/botocore/vendored/requests/packages/urllib3/exceptions.py +++ b/botocore/vendored/requests/packages/urllib3/exceptions.py @@ -1,9 +1,3 @@ -# urllib3/exceptions.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - ## Base Exceptions @@ -11,6 +5,11 @@ class HTTPError(Exception): "Base exception used by this module." pass +class HTTPWarning(Warning): + "Base warning used by this module." + pass + + class PoolError(HTTPError): "Base exception for errors caused within a pool." @@ -49,19 +48,32 @@ class DecodeError(HTTPError): pass +class ProtocolError(HTTPError): + "Raised when something unexpected happens mid-request/response." + pass + + +#: Renamed to ProtocolError but aliased for backwards compatibility. +ConnectionError = ProtocolError + + ## Leaf Exceptions class MaxRetryError(RequestError): - "Raised when the maximum number of retries is exceeded." + """Raised when the maximum number of retries is exceeded. + + :param pool: The connection pool + :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` + :param string url: The requested Url + :param exceptions.Exception reason: The underlying error + + """ def __init__(self, pool, url, reason=None): self.reason = reason - message = "Max retries exceeded with url: %s" % url - if reason: - message += " (Caused by %s: %s)" % (type(reason), reason) - else: - message += " (Caused by redirect)" + message = "Max retries exceeded with url: %s (Caused by %r)" % ( + url, reason) RequestError.__init__(self, pool, url, message) @@ -111,7 +123,12 @@ class ClosedPoolError(PoolError): pass -class LocationParseError(ValueError, HTTPError): +class LocationValueError(ValueError, HTTPError): + "Raised when there is something wrong with a given URL input." + pass + + +class LocationParseError(LocationValueError): "Raised when get_host or similar fails to parse the URL input." def __init__(self, location): @@ -119,3 +136,24 @@ def __init__(self, location): HTTPError.__init__(self, message) self.location = location + + +class ResponseError(HTTPError): + "Used as a container for an error reason supplied in a MaxRetryError." + GENERIC_ERROR = 'too many error responses' + SPECIFIC_ERROR = 'too many {status_code} error responses' + + +class SecurityWarning(HTTPWarning): + "Warned when perfoming security reducing actions" + pass + + +class InsecureRequestWarning(SecurityWarning): + "Warned when making an unverified HTTPS request." + pass + + +class SystemTimeWarning(SecurityWarning): + "Warned when system time is suspected to be wrong" + pass diff --git a/botocore/vendored/requests/packages/urllib3/fields.py b/botocore/vendored/requests/packages/urllib3/fields.py index ed017657a2..c853f8d56b 100644 --- a/botocore/vendored/requests/packages/urllib3/fields.py +++ b/botocore/vendored/requests/packages/urllib3/fields.py @@ -1,9 +1,3 @@ -# urllib3/fields.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import email.utils import mimetypes @@ -15,7 +9,7 @@ def guess_content_type(filename, default='application/octet-stream'): Guess the "Content-Type" of a file. :param filename: - The filename to guess the "Content-Type" of using :mod:`mimetimes`. + The filename to guess the "Content-Type" of using :mod:`mimetypes`. :param default: If no "Content-Type" can be guessed, default to `default`. """ @@ -78,9 +72,10 @@ def from_tuples(cls, fieldname, value): """ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. - Supports constructing :class:`~urllib3.fields.RequestField` from parameter - of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) - tuple where the MIME type is optional. For example: :: + Supports constructing :class:`~urllib3.fields.RequestField` from + parameter of key/value strings AND key/filetuple. A filetuple is a + (filename, data, MIME type) tuple where the MIME type is optional. + For example:: 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), @@ -125,8 +120,8 @@ def _render_parts(self, header_parts): 'Content-Disposition' fields. :param header_parts: - A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as - `k1="v1"; k2="v2"; ...`. + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format + as `k1="v1"; k2="v2"; ...`. """ parts = [] iterable = header_parts @@ -158,7 +153,8 @@ def render_headers(self): lines.append('\r\n') return '\r\n'.join(lines) - def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + def make_multipart(self, content_disposition=None, content_type=None, + content_location=None): """ Makes this request field into a multipart request field. @@ -172,6 +168,10 @@ def make_multipart(self, content_disposition=None, content_type=None, content_lo """ self.headers['Content-Disposition'] = content_disposition or 'form-data' - self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Disposition'] += '; '.join([ + '', self._render_parts( + (('name', self._name), ('filename', self._filename)) + ) + ]) self.headers['Content-Type'] = content_type self.headers['Content-Location'] = content_location diff --git a/botocore/vendored/requests/packages/urllib3/filepost.py b/botocore/vendored/requests/packages/urllib3/filepost.py index 4575582e91..0fbf488dfe 100644 --- a/botocore/vendored/requests/packages/urllib3/filepost.py +++ b/botocore/vendored/requests/packages/urllib3/filepost.py @@ -1,11 +1,4 @@ -# urllib3/filepost.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import codecs -import mimetypes from uuid import uuid4 from io import BytesIO @@ -38,24 +31,23 @@ def iter_field_objects(fields): i = iter(fields) for field in i: - if isinstance(field, RequestField): - yield field - else: - yield RequestField.from_tuples(*field) + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) def iter_fields(fields): """ - Iterate over fields. + .. deprecated:: 1.6 - .. deprecated :: + Iterate over fields. - The addition of `~urllib3.fields.RequestField` makes this function - obsolete. Instead, use :func:`iter_field_objects`, which returns - `~urllib3.fields.RequestField` objects, instead. + The addition of :class:`~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + :class:`~urllib3.fields.RequestField` objects. Supports list of (k, v) tuples and dicts. - """ if isinstance(fields, dict): return ((k, v) for k, v in six.iteritems(fields)) diff --git a/botocore/vendored/requests/packages/urllib3/packages/ordered_dict.py b/botocore/vendored/requests/packages/urllib3/packages/ordered_dict.py index 7f8ee15436..4479363cc4 100644 --- a/botocore/vendored/requests/packages/urllib3/packages/ordered_dict.py +++ b/botocore/vendored/requests/packages/urllib3/packages/ordered_dict.py @@ -2,7 +2,6 @@ # Passes Python2.7's test suite and incorporates all the latest updates. # Copyright 2009 Raymond Hettinger, released under the MIT License. # http://code.activestate.com/recipes/576693/ - try: from thread import get_ident as _get_ident except ImportError: diff --git a/botocore/vendored/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/botocore/vendored/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py index 2d61ac2139..dd59a75fd3 100644 --- a/botocore/vendored/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ b/botocore/vendored/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -1,98 +1,13 @@ -"""The match_hostname() function from Python 3.2, essential when using SSL.""" - -import re - -__version__ = '3.2.2' - -class CertificateError(ValueError): - pass - -def _dnsname_match(dn, hostname, max_wildcards=1): - """Matching according to RFC 6125, section 6.4.3 - - http://tools.ietf.org/html/rfc6125#section-6.4.3 - """ - pats = [] - if not dn: - return False - - parts = dn.split(r'.') - leftmost = parts[0] - - wildcards = leftmost.count('*') - if wildcards > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survery of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise CertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) - - # speed up common case w/o wildcards - if not wildcards: - return dn.lower() == hostname.lower() - - # RFC 6125, section 6.4.3, subitem 1. - # The client SHOULD NOT attempt to match a presented identifier in which - # the wildcard character comprises a label other than the left-most label. - if leftmost == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - elif leftmost.startswith('xn--') or hostname.startswith('xn--'): - # RFC 6125, section 6.4.3, subitem 3. - # The client SHOULD NOT attempt to match a presented identifier - # where the wildcard character is embedded within an A-label or - # U-label of an internationalized domain name. - pats.append(re.escape(leftmost)) - else: - # Otherwise, '*' matches any dotless string, e.g. www* - pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) - - # add the remaining fragments, ignore any wildcards - for frag in parts[1:]: - pats.append(re.escape(frag)) - - pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - return pat.match(hostname) - - -def match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 - rules are followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise CertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise CertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise CertificateError("no appropriate commonName or " - "subjectAltName fields were found") +try: + # Python 3.2+ + from ssl import CertificateError, match_hostname +except ImportError: + try: + # Backport of the function from a pypi module + from backports.ssl_match_hostname import CertificateError, match_hostname + except ImportError: + # Our vendored copy + from ._implementation import CertificateError, match_hostname + +# Not needed, but documenting what we provide. +__all__ = ('CertificateError', 'match_hostname') diff --git a/botocore/vendored/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py b/botocore/vendored/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py new file mode 100644 index 0000000000..52f428733d --- /dev/null +++ b/botocore/vendored/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py @@ -0,0 +1,105 @@ +"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" + +# Note: This file is under the PSF license as the code comes from the python +# stdlib. http://docs.python.org/3/license.html + +import re + +__version__ = '3.4.0.2' + +class CertificateError(ValueError): + pass + + +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + # Ported from python3-syntax: + # leftmost, *remainder = dn.split(r'.') + parts = dn.split(r'.') + leftmost = parts[0] + remainder = parts[1:] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/botocore/vendored/requests/packages/urllib3/poolmanager.py b/botocore/vendored/requests/packages/urllib3/poolmanager.py index e7f8667ee6..515dc96219 100644 --- a/botocore/vendored/requests/packages/urllib3/poolmanager.py +++ b/botocore/vendored/requests/packages/urllib3/poolmanager.py @@ -1,9 +1,3 @@ -# urllib3/poolmanager.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import logging try: # Python 3 @@ -14,8 +8,10 @@ from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme +from .exceptions import LocationValueError from .request import RequestMethods -from .util import parse_url +from .util.url import parse_url +from .util.retry import Retry __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] @@ -49,7 +45,7 @@ class PoolManager(RequestMethods): Additional parameters are used to create fresh :class:`urllib3.connectionpool.ConnectionPool` instances. - Example: :: + Example:: >>> manager = PoolManager(num_pools=2) >>> r = manager.request('GET', 'http://google.com/') @@ -102,10 +98,11 @@ def connection_from_host(self, host, port=None, scheme='http'): ``urllib3.connectionpool.port_by_scheme``. """ - scheme = scheme or 'http' + if not host: + raise LocationValueError("No host specified.") + scheme = scheme or 'http' port = port or port_by_scheme.get(scheme, 80) - pool_key = (scheme, host, port) with self.pools.lock: @@ -118,6 +115,7 @@ def connection_from_host(self, host, port=None, scheme='http'): # Make a fresh ConnectionPool of the desired type pool = self._new_pool(scheme, host, port) self.pools[pool_key] = pool + return pool def connection_from_url(self, url): @@ -161,13 +159,18 @@ def urlopen(self, method, url, redirect=True, **kw): # Support relative URLs for redirecting. redirect_location = urljoin(url, redirect_location) - # RFC 2616, Section 10.3.4 + # RFC 7231, Section 6.4.4 if response.status == 303: method = 'GET' - log.info("Redirecting %s -> %s" % (url, redirect_location)) - kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + retries = kw.get('retries') + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + kw['retries'] = retries.increment(method, redirect_location) kw['redirect'] = redirect + + log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, **kw) @@ -176,7 +179,7 @@ class ProxyManager(PoolManager): Behaves just like :class:`PoolManager`, but sends all requests through the defined proxy, using the CONNECT method for HTTPS URLs. - :param poxy_url: + :param proxy_url: The URL of the proxy to be used. :param proxy_headers: @@ -208,12 +211,16 @@ def __init__(self, proxy_url, num_pools=10, headers=None, if not proxy.port: port = port_by_scheme.get(proxy.scheme, 80) proxy = proxy._replace(port=port) + + assert proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % proxy.scheme + self.proxy = proxy self.proxy_headers = proxy_headers or {} - assert self.proxy.scheme in ("http", "https"), \ - 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( num_pools, headers, **connection_pool_kw) @@ -245,14 +252,13 @@ def urlopen(self, method, url, redirect=True, **kw): u = parse_url(url) if u.scheme == "http": - # It's too late to set proxy headers on per-request basis for - # tunnelled HTTPS connections, should use - # constructor's proxy_headers instead. - kw['headers'] = self._set_proxy_headers(url, kw.get('headers', - self.headers)) - kw['headers'].update(self.proxy_headers) - - return super(ProxyManager, self).urlopen(method, url, redirect, **kw) + # For proxied HTTPS requests, httplib sets the necessary headers + # on the CONNECT to the proxy. For HTTP, we'll definitely + # need to set 'Host' at the very least. + headers = kw.get('headers', self.headers) + kw['headers'] = self._set_proxy_headers(url, headers) + + return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) def proxy_from_url(url, **kw): diff --git a/botocore/vendored/requests/packages/urllib3/request.py b/botocore/vendored/requests/packages/urllib3/request.py index 66a9a0e690..b08d6c9274 100644 --- a/botocore/vendored/requests/packages/urllib3/request.py +++ b/botocore/vendored/requests/packages/urllib3/request.py @@ -1,9 +1,3 @@ -# urllib3/request.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - try: from urllib.parse import urlencode except ImportError: @@ -26,8 +20,8 @@ class RequestMethods(object): Specifically, - :meth:`.request_encode_url` is for sending requests whose fields are encoded - in the URL (such as GET, HEAD, DELETE). + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). :meth:`.request_encode_body` is for sending requests whose fields are encoded in the *body* of the request using multipart or www-form-urlencoded @@ -45,14 +39,13 @@ class RequestMethods(object): """ _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) - _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) def __init__(self, headers=None): self.headers = headers or {} def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, - **kw): # Abstract + **kw): # Abstract raise NotImplemented("Classes extending RequestMethods must implement " "their own ``urlopen`` method.") @@ -62,8 +55,8 @@ def request(self, method, url, fields=None, headers=None, **urlopen_kw): ``fields`` based on the ``method`` used. This is a convenience method that requires the least amount of manual - effort. It can be used in most situations, while still having the option - to drop down to more specific methods when necessary, such as + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as :meth:`request_encode_url`, :meth:`request_encode_body`, or even the lowest level :meth:`urlopen`. """ @@ -71,12 +64,12 @@ def request(self, method, url, fields=None, headers=None, **urlopen_kw): if method in self._encode_url_methods: return self.request_encode_url(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) else: return self.request_encode_body(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) def request_encode_url(self, method, url, fields=None, **urlopen_kw): """ @@ -95,18 +88,18 @@ def request_encode_body(self, method, url, fields=None, headers=None, the body. This is useful for request methods like POST, PUT, PATCH, etc. When ``encode_multipart=True`` (default), then - :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the - payload with the appropriate content type. Otherwise + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise :meth:`urllib.urlencode` is used with the 'application/x-www-form-urlencoded' content type. Multipart encoding must be used when posting files, and it's reasonably - safe to use it in other times too. However, it may break request signing, - such as with OAuth. + safe to use it in other times too. However, it may break request + signing, such as with OAuth. Supports an optional ``fields`` parameter of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) tuple where - the MIME type is optional. For example: :: + the MIME type is optional. For example:: fields = { 'foo': 'bar', @@ -120,23 +113,29 @@ def request_encode_body(self, method, url, fields=None, headers=None, When uploading a file, providing a filename (the first parameter of the tuple) is optional but recommended to best mimick behavior of browsers. - Note that if ``headers`` are supplied, the 'Content-Type' header will be - overwritten because it depends on the dynamic random boundary string + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. """ - if encode_multipart: - body, content_type = encode_multipart_formdata(fields or {}, - boundary=multipart_boundary) - else: - body, content_type = (urlencode(fields or {}), - 'application/x-www-form-urlencoded') - if headers is None: headers = self.headers - headers_ = {'Content-Type': content_type} - headers_.update(headers) + extra_kw = {'headers': {}} + + if fields: + if 'body' in urlopen_kw: + raise TypeError('request got values for both \'fields\' and \'body\', can only specify one.') + + if encode_multipart: + body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) + else: + body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' + + extra_kw['body'] = body + extra_kw['headers'] = {'Content-Type': content_type} + + extra_kw['headers'].update(headers) + extra_kw.update(urlopen_kw) - return self.urlopen(method, url, body=body, headers=headers_, - **urlopen_kw) + return self.urlopen(method, url, **extra_kw) diff --git a/botocore/vendored/requests/packages/urllib3/response.py b/botocore/vendored/requests/packages/urllib3/response.py index 4efff5a13b..e69de95733 100644 --- a/botocore/vendored/requests/packages/urllib3/response.py +++ b/botocore/vendored/requests/packages/urllib3/response.py @@ -1,21 +1,14 @@ -# urllib3/response.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - -import logging import zlib import io +from socket import timeout as SocketTimeout -from .exceptions import DecodeError +from ._collections import HTTPHeaderDict +from .exceptions import ProtocolError, DecodeError, ReadTimeoutError from .packages.six import string_types as basestring, binary_type -from .util import is_fp_closed +from .connection import HTTPException, BaseSSLError +from .util.response import is_fp_closed -log = logging.getLogger(__name__) - class DeflateDecoder(object): @@ -55,7 +48,10 @@ class HTTPResponse(io.IOBase): HTTP Response container. Backwards-compatible to httplib's HTTPResponse but the response ``body`` is - loaded and decoded on-demand when the ``data`` property is accessed. + loaded and decoded on-demand when the ``data`` property is accessed. This + class is also compatible with the Python standard library's :mod:`io` + module, and can hence be treated as a readable object in the context of that + framework. Extra parameters for behaviour not present in httplib.HTTPResponse: @@ -79,7 +75,10 @@ class HTTPResponse(io.IOBase): def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = headers or {} + + self.headers = HTTPHeaderDict() + if headers: + self.headers.update(headers) self.status = status self.version = version self.reason = reason @@ -87,9 +86,13 @@ def __init__(self, body='', headers=None, status=0, version=0, reason=None, self.decode_content = decode_content self._decoder = None - self._body = body if body and isinstance(body, basestring) else None + self._body = None self._fp = None self._original_response = original_response + self._fp_bytes_read = 0 + + if body and isinstance(body, (basestring, binary_type)): + self._body = body self._pool = pool self._connection = connection @@ -129,6 +132,14 @@ def data(self): if self._fp: return self.read(cache_content=True) + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``HTTPResponse.read`` if bytes + are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -150,8 +161,8 @@ def read(self, amt=None, decode_content=None, cache_content=False): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 2616 - # Section 3.5 + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 content_encoding = self.headers.get('content-encoding', '').lower() if self._decoder is None: if content_encoding in self.CONTENT_DECODERS: @@ -165,23 +176,44 @@ def read(self, amt=None, decode_content=None, cache_content=False): flush_decoder = False try: - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() - flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) - if amt != 0 and not data: # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do not - # properly close the connection in all cases. There is no harm - # in redundantly calling close. - self._fp.close() + try: + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + flush_decoder = True + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if not 'read operation timed out' in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except HTTPException as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) + + self._fp_bytes_read += len(data) try: if decode_content and self._decoder: @@ -189,8 +221,7 @@ def read(self, amt=None, decode_content=None, cache_content=False): except (IOError, zlib.error) as e: raise DecodeError( "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, - e) + "failed to decode it." % content_encoding, e) if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) @@ -227,7 +258,6 @@ def stream(self, amt=2**16, decode_content=None): if data: yield data - @classmethod def from_httplib(ResponseCls, r, **response_kw): """ @@ -238,17 +268,9 @@ def from_httplib(ResponseCls, r, **response_kw): with ``original_response=r``. """ - # Normalize headers between different versions of Python - headers = {} + headers = HTTPHeaderDict() for k, v in r.getheaders(): - # Python 3: Header keys are returned capitalised - k = k.lower() - - has_value = headers.get(k) - if has_value: # Python 3: Repeating header keys are unmerged. - v = ', '.join([has_value, v]) - - headers[k] = v + headers.add(k, v) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) @@ -290,7 +312,7 @@ def fileno(self): elif hasattr(self._fp, "fileno"): return self._fp.fileno() else: - raise IOError("The file-like object this HTTPResponse is wrapped " + raise IOError("The file-like object this HTTPResponse is wrapped " "around has no file descriptor") def flush(self): @@ -298,4 +320,14 @@ def flush(self): return self._fp.flush() def readable(self): + # This method is required for `io` module compatibility. return True + + def readinto(self, b): + # This method is required for `io` module compatibility. + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + else: + b[:len(temp)] = temp + return len(temp) diff --git a/botocore/vendored/requests/packages/urllib3/util.py b/botocore/vendored/requests/packages/urllib3/util.py deleted file mode 100644 index cf934d4246..0000000000 --- a/botocore/vendored/requests/packages/urllib3/util.py +++ /dev/null @@ -1,635 +0,0 @@ -# urllib3/util.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - -from base64 import b64encode -from binascii import hexlify, unhexlify -from collections import namedtuple -from hashlib import md5, sha1 -from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT -import time - -try: - from select import poll, POLLIN -except ImportError: # `poll` doesn't exist on OSX and other platforms - poll = False - try: - from select import select - except ImportError: # `select` doesn't exist on AppEngine. - select = False - -try: # Test for SSL features - SSLContext = None - HAS_SNI = False - - import ssl - from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 - from ssl import SSLContext # Modern SSL? - from ssl import HAS_SNI # Has SNI? -except ImportError: - pass - -from .packages import six -from .exceptions import LocationParseError, SSLError, TimeoutStateError - - -_Default = object() -# The default timeout to use for socket connections. This is the attribute used -# by httplib to define the default timeout - - -def current_time(): - """ - Retrieve the current time, this function is mocked out in unit testing. - """ - return time.time() - - -class Timeout(object): - """ - Utility object for storing timeout values. - - Example usage: - - .. code-block:: python - - timeout = urllib3.util.Timeout(connect=2.0, read=7.0) - pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) - pool.request(...) # Etc, etc - - :param connect: - The maximum amount of time to wait for a connection attempt to a server - to succeed. Omitting the parameter will default the connect timeout to - the system default, probably `the global default timeout in socket.py - `_. - None will set an infinite timeout for connection attempts. - - :type connect: integer, float, or None - - :param read: - The maximum amount of time to wait between consecutive - read operations for a response from the server. Omitting - the parameter will default the read timeout to the system - default, probably `the global default timeout in socket.py - `_. - None will set an infinite timeout. - - :type read: integer, float, or None - - :param total: - This combines the connect and read timeouts into one; the read timeout - will be set to the time leftover from the connect attempt. In the - event that both a connect timeout and a total are specified, or a read - timeout and a total are specified, the shorter timeout will be applied. - - Defaults to None. - - :type total: integer, float, or None - - .. note:: - - Many factors can affect the total amount of time for urllib3 to return - an HTTP response. Specifically, Python's DNS resolver does not obey the - timeout specified on the socket. Other factors that can affect total - request time include high CPU load, high swap, the program running at a - low priority level, or other behaviors. The observed running time for - urllib3 to return a response may be greater than the value passed to - `total`. - - In addition, the read and total timeouts only measure the time between - read operations on the socket connecting the client and the server, - not the total amount of time for the request to return a complete - response. For most requests, the timeout is raised because the server - has not sent the first byte in the specified time. This is not always - the case; if a server streams one byte every fifteen seconds, a timeout - of 20 seconds will not ever trigger, even though the request will - take several minutes to complete. - - If your goal is to cut off any request after a set amount of wall clock - time, consider having a second "watcher" thread to cut off a slow - request. - """ - - #: A sentinel object representing the default timeout value - DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - - def __init__(self, total=None, connect=_Default, read=_Default): - self._connect = self._validate_timeout(connect, 'connect') - self._read = self._validate_timeout(read, 'read') - self.total = self._validate_timeout(total, 'total') - self._start_connect = None - - def __str__(self): - return '%s(connect=%r, read=%r, total=%r)' % ( - type(self).__name__, self._connect, self._read, self.total) - - - @classmethod - def _validate_timeout(cls, value, name): - """ Check that a timeout attribute is valid - - :param value: The timeout value to validate - :param name: The name of the timeout attribute to validate. This is used - for clear error messages - :return: the value - :raises ValueError: if the type is not an integer or a float, or if it - is a numeric value less than zero - """ - if value is _Default: - return cls.DEFAULT_TIMEOUT - - if value is None or value is cls.DEFAULT_TIMEOUT: - return value - - try: - float(value) - except (TypeError, ValueError): - raise ValueError("Timeout value %s was %s, but it must be an " - "int or float." % (name, value)) - - try: - if value < 0: - raise ValueError("Attempted to set %s timeout to %s, but the " - "timeout cannot be set to a value less " - "than 0." % (name, value)) - except TypeError: # Python 3 - raise ValueError("Timeout value %s was %s, but it must be an " - "int or float." % (name, value)) - - return value - - @classmethod - def from_float(cls, timeout): - """ Create a new Timeout from a legacy timeout value. - - The timeout value used by httplib.py sets the same timeout on the - connect(), and recv() socket requests. This creates a :class:`Timeout` - object that sets the individual timeouts to the ``timeout`` value passed - to this function. - - :param timeout: The legacy timeout value - :type timeout: integer, float, sentinel default object, or None - :return: a Timeout object - :rtype: :class:`Timeout` - """ - return Timeout(read=timeout, connect=timeout) - - def clone(self): - """ Create a copy of the timeout object - - Timeout properties are stored per-pool but each request needs a fresh - Timeout object to ensure each one has its own start/stop configured. - - :return: a copy of the timeout object - :rtype: :class:`Timeout` - """ - # We can't use copy.deepcopy because that will also create a new object - # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to - # detect the user default. - return Timeout(connect=self._connect, read=self._read, - total=self.total) - - def start_connect(self): - """ Start the timeout clock, used during a connect() attempt - - :raises urllib3.exceptions.TimeoutStateError: if you attempt - to start a timer that has been started already. - """ - if self._start_connect is not None: - raise TimeoutStateError("Timeout timer has already been started.") - self._start_connect = current_time() - return self._start_connect - - def get_connect_duration(self): - """ Gets the time elapsed since the call to :meth:`start_connect`. - - :return: the elapsed time - :rtype: float - :raises urllib3.exceptions.TimeoutStateError: if you attempt - to get duration for a timer that hasn't been started. - """ - if self._start_connect is None: - raise TimeoutStateError("Can't get connect duration for timer " - "that has not started.") - return current_time() - self._start_connect - - @property - def connect_timeout(self): - """ Get the value to use when setting a connection timeout. - - This will be a positive float or integer, the value None - (never timeout), or the default system timeout. - - :return: the connect timeout - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None - """ - if self.total is None: - return self._connect - - if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: - return self.total - - return min(self._connect, self.total) - - @property - def read_timeout(self): - """ Get the value for the read timeout. - - This assumes some time has elapsed in the connection timeout and - computes the read timeout appropriately. - - If self.total is set, the read timeout is dependent on the amount of - time taken by the connect timeout. If the connection time has not been - established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be - raised. - - :return: the value to use for the read timeout - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None - :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` - has not yet been called on this object. - """ - if (self.total is not None and - self.total is not self.DEFAULT_TIMEOUT and - self._read is not None and - self._read is not self.DEFAULT_TIMEOUT): - # in case the connect timeout has not yet been established. - if self._start_connect is None: - return self._read - return max(0, min(self.total - self.get_connect_duration(), - self._read)) - elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: - return max(0, self.total - self.get_connect_duration()) - else: - return self._read - - -class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): - """ - Datastructure for representing an HTTP URL. Used as a return value for - :func:`parse_url`. - """ - slots = () - - def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): - return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) - - @property - def hostname(self): - """For backwards-compatibility with urlparse. We're nice like that.""" - return self.host - - @property - def request_uri(self): - """Absolute path including the query string.""" - uri = self.path or '/' - - if self.query is not None: - uri += '?' + self.query - - return uri - - @property - def netloc(self): - """Network location including host and port""" - if self.port: - return '%s:%d' % (self.host, self.port) - return self.host - - -def split_first(s, delims): - """ - Given a string and an iterable of delimiters, split on the first found - delimiter. Return two split parts and the matched delimiter. - - If not found, then the first part is the full input string. - - Example: :: - - >>> split_first('foo/bar?baz', '?/=') - ('foo', 'bar?baz', '/') - >>> split_first('foo/bar?baz', '123') - ('foo/bar?baz', '', None) - - Scales linearly with number of delims. Not ideal for large number of delims. - """ - min_idx = None - min_delim = None - for d in delims: - idx = s.find(d) - if idx < 0: - continue - - if min_idx is None or idx < min_idx: - min_idx = idx - min_delim = d - - if min_idx is None or min_idx < 0: - return s, '', None - - return s[:min_idx], s[min_idx+1:], min_delim - - -def parse_url(url): - """ - Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is - performed to parse incomplete urls. Fields not provided will be None. - - Partly backwards-compatible with :mod:`urlparse`. - - Example: :: - - >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/', ...) - >>> parse_url('google.com:80') - Url(scheme=None, host='google.com', port=80, path=None, ...) - >>> parse_url('/foo?bar') - Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) - """ - - # While this code has overlap with stdlib's urlparse, it is much - # simplified for our needs and less annoying. - # Additionally, this implementations does silly things to be optimal - # on CPython. - - scheme = None - auth = None - host = None - port = None - path = None - fragment = None - query = None - - # Scheme - if '://' in url: - scheme, url = url.split('://', 1) - - # Find the earliest Authority Terminator - # (http://tools.ietf.org/html/rfc3986#section-3.2) - url, path_, delim = split_first(url, ['/', '?', '#']) - - if delim: - # Reassemble the path - path = delim + path_ - - # Auth - if '@' in url: - # Last '@' denotes end of auth part - auth, url = url.rsplit('@', 1) - - # IPv6 - if url and url[0] == '[': - host, url = url.split(']', 1) - host += ']' - - # Port - if ':' in url: - _host, port = url.split(':', 1) - - if not host: - host = _host - - if port: - # If given, ports must be integers. - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - port = int(port) - else: - # Blank ports are cool, too. (rfc3986#section-3.2.3) - port = None - - elif not host and url: - host = url - - if not path: - return Url(scheme, auth, host, port, path, query, fragment) - - # Fragment - if '#' in path: - path, fragment = path.split('#', 1) - - # Query - if '?' in path: - path, query = path.split('?', 1) - - return Url(scheme, auth, host, port, path, query, fragment) - - -def get_host(url): - """ - Deprecated. Use :func:`.parse_url` instead. - """ - p = parse_url(url) - return p.scheme or 'http', p.hostname, p.port - - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" - - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - Example: :: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) - else: - accept_encoding = 'gzip,deflate' - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' - - if basic_auth: - headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') - - return headers - - -def is_connection_dropped(conn): # Platform-specific - """ - Returns True if the connection is dropped and should be closed. - - :param conn: - :class:`httplib.HTTPConnection` object. - - Note: For platforms like AppEngine, this will always return ``False`` to - let the platform handle connection recycling transparently for us. - """ - sock = getattr(conn, 'sock', False) - if not sock: # Platform-specific: AppEngine - return False - - if not poll: - if not select: # Platform-specific: AppEngine - return False - - try: - return select([sock], [], [], 0.0)[0] - except SocketError: - return True - - # This version is better on platforms that support it. - p = poll() - p.register(sock, POLLIN) - for (fno, ev) in p.poll(0.0): - if fno == sock.fileno(): - # Either data is buffered (bad), or the connection is dropped. - return True - - -def resolve_cert_reqs(candidate): - """ - Resolves the argument to a numeric constant, which can be passed to - the wrap_socket function/method from the ssl module. - Defaults to :data:`ssl.CERT_NONE`. - If given a string it is assumed to be the name of the constant in the - :mod:`ssl` module or its abbrevation. - (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. - If it's neither `None` nor a string we assume it is already the numeric - constant which can directly be passed to wrap_socket. - """ - if candidate is None: - return CERT_NONE - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'CERT_' + candidate) - return res - - return candidate - - -def resolve_ssl_version(candidate): - """ - like resolve_cert_reqs - """ - if candidate is None: - return PROTOCOL_SSLv23 - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'PROTOCOL_' + candidate) - return res - - return candidate - - -def assert_fingerprint(cert, fingerprint): - """ - Checks if given fingerprint matches the supplied certificate. - - :param cert: - Certificate as bytes object. - :param fingerprint: - Fingerprint as string of hexdigits, can be interspersed by colons. - """ - - # Maps the length of a digest to a possible hash function producing - # this digest. - hashfunc_map = { - 16: md5, - 20: sha1 - } - - fingerprint = fingerprint.replace(':', '').lower() - - digest_length, rest = divmod(len(fingerprint), 2) - - if rest or digest_length not in hashfunc_map: - raise SSLError('Fingerprint is of invalid length.') - - # We need encode() here for py32; works on py2 and p33. - fingerprint_bytes = unhexlify(fingerprint.encode()) - - hashfunc = hashfunc_map[digest_length] - - cert_digest = hashfunc(cert).digest() - - if not cert_digest == fingerprint_bytes: - raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' - .format(hexlify(fingerprint_bytes), - hexlify(cert_digest))) - -def is_fp_closed(obj): - """ - Checks whether a given file-like object is closed. - - :param obj: - The file-like object to check. - """ - if hasattr(obj, 'fp'): - # Object is a container for another file-like object that gets released - # on exhaustion (e.g. HTTPResponse) - return obj.fp is None - - return obj.closed - - -if SSLContext is not None: # Python 3.2+ - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - """ - All arguments except `server_hostname` have the same meaning as for - :func:`ssl.wrap_socket` - - :param server_hostname: - Hostname of the expected certificate - """ - context = SSLContext(ssl_version) - context.verify_mode = cert_reqs - if ca_certs: - try: - context.load_verify_locations(ca_certs) - # Py32 raises IOError - # Py33 raises FileNotFoundError - except Exception as e: # Reraise as SSLError - raise SSLError(e) - if certfile: - # FIXME: This block needs a test. - context.load_cert_chain(certfile, keyfile) - if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI - return context.wrap_socket(sock, server_hostname=server_hostname) - return context.wrap_socket(sock) - -else: # Python 3.1 and earlier - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - return wrap_socket(sock, keyfile=keyfile, certfile=certfile, - ca_certs=ca_certs, cert_reqs=cert_reqs, - ssl_version=ssl_version) diff --git a/botocore/vendored/requests/packages/urllib3/util/__init__.py b/botocore/vendored/requests/packages/urllib3/util/__init__.py new file mode 100644 index 0000000000..8becc81433 --- /dev/null +++ b/botocore/vendored/requests/packages/urllib3/util/__init__.py @@ -0,0 +1,24 @@ +# For backwards compatibility, provide imports that used to be here. +from .connection import is_connection_dropped +from .request import make_headers +from .response import is_fp_closed +from .ssl_ import ( + SSLContext, + HAS_SNI, + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) +from .timeout import ( + current_time, + Timeout, +) + +from .retry import Retry +from .url import ( + get_host, + parse_url, + split_first, + Url, +) diff --git a/botocore/vendored/requests/packages/urllib3/util/connection.py b/botocore/vendored/requests/packages/urllib3/util/connection.py new file mode 100644 index 0000000000..2156993a0c --- /dev/null +++ b/botocore/vendored/requests/packages/urllib3/util/connection.py @@ -0,0 +1,97 @@ +import socket +try: + from select import poll, POLLIN +except ImportError: # `poll` doesn't exist on OSX and other platforms + poll = False + try: + from select import select + except ImportError: # `select` doesn't exist on AppEngine. + select = False + + +def is_connection_dropped(conn): # Platform-specific + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + :class:`httplib.HTTPConnection` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if sock is False: # Platform-specific: AppEngine + return False + if sock is None: # Connection already closed (such as by httplib). + return True + + if not poll: + if not select: # Platform-specific: AppEngine + return False + + try: + return select([sock], [], [], 0.0)[0] + except socket.error: + return True + + # This version is better on platforms that support it. + p = poll() + p.register(sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True + + +# This function is copied from socket.py in the Python 2.7 standard +# library test suite. Added to its signature is only `socket_options`. +def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, socket_options=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + + # If provided, set socket level options before connecting. + # This is the only addition urllib3 makes to this function. + _set_socket_options(sock, socket_options) + + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except socket.error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise socket.error("getaddrinfo returns an empty list") + + +def _set_socket_options(sock, options): + if options is None: + return + + for opt in options: + sock.setsockopt(*opt) diff --git a/botocore/vendored/requests/packages/urllib3/util/request.py b/botocore/vendored/requests/packages/urllib3/util/request.py new file mode 100644 index 0000000000..bc64f6b1fb --- /dev/null +++ b/botocore/vendored/requests/packages/urllib3/util/request.py @@ -0,0 +1,71 @@ +from base64 import b64encode + +from ..packages.six import b + +ACCEPT_ENCODING = 'gzip,deflate' + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None, proxy_basic_auth=None, disable_cache=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. + + Example:: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = ACCEPT_ENCODING + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(b(basic_auth)).decode('utf-8') + + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(b(proxy_basic_auth)).decode('utf-8') + + if disable_cache: + headers['cache-control'] = 'no-cache' + + return headers diff --git a/botocore/vendored/requests/packages/urllib3/util/response.py b/botocore/vendored/requests/packages/urllib3/util/response.py new file mode 100644 index 0000000000..45fff55246 --- /dev/null +++ b/botocore/vendored/requests/packages/urllib3/util/response.py @@ -0,0 +1,22 @@ +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + + try: + # Check via the official file-like-object way. + return obj.closed + except AttributeError: + pass + + try: + # Check if the object is a container for another file-like object that + # gets released on exhaustion (e.g. HTTPResponse). + return obj.fp is None + except AttributeError: + pass + + raise ValueError("Unable to determine whether fp is closed.") diff --git a/botocore/vendored/requests/packages/urllib3/util/retry.py b/botocore/vendored/requests/packages/urllib3/util/retry.py new file mode 100644 index 0000000000..aeaf8a0253 --- /dev/null +++ b/botocore/vendored/requests/packages/urllib3/util/retry.py @@ -0,0 +1,285 @@ +import time +import logging + +from ..exceptions import ( + ConnectTimeoutError, + MaxRetryError, + ProtocolError, + ReadTimeoutError, + ResponseError, +) +from ..packages import six + + +log = logging.getLogger(__name__) + + +class Retry(object): + """ Retry configuration. + + Each retry attempt will create a new Retry object with updated values, so + they can be safely reused. + + Retries can be defined as a default for a pool:: + + retries = Retry(connect=5, read=2, redirect=5) + http = PoolManager(retries=retries) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', retries=Retry(10)) + + Retries can be disabled by passing ``False``:: + + response = http.request('GET', 'http://example.com/', retries=False) + + Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + retries are disabled, in which case the causing exception will be raised. + + :param int total: + Total number of retries to allow. Takes precedence over other counts. + + Set to ``None`` to remove this constraint and fall back on other + counts. It's a good idea to set this to some sensibly-high value to + account for unexpected edge cases and avoid infinite retry loops. + + Set to ``0`` to fail on the first retry. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int connect: + How many connection-related errors to retry on. + + These are errors raised before the request is sent to the remote server, + which we assume has not triggered the server to process the request. + + Set to ``0`` to fail on the first retry of this type. + + :param int read: + How many times to retry on read errors. + + These errors are raised after the request was sent to the server, so the + request may have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + :param int redirect: + How many redirects to perform. Limit this to avoid infinite redirect + loops. + + A redirect is a HTTP response with a status code 301, 302, 303, 307 or + 308. + + Set to ``0`` to fail on the first retry of this type. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param iterable method_whitelist: + Set of uppercased HTTP method verbs that we should retry on. + + By default, we only retry on methods which are considered to be + indempotent (multiple requests with the same parameters end with the + same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + + :param iterable status_forcelist: + A set of HTTP status codes that we should force a retry on. + + By default, this is disabled with ``None``. + + :param float backoff_factor: + A backoff factor to apply between attempts. urllib3 will sleep for:: + + {backoff factor} * (2 ^ ({number of total retries} - 1)) + + seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep + for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer + than :attr:`Retry.MAX_BACKOFF`. + + By default, backoff is disabled (set to 0). + + :param bool raise_on_redirect: Whether, if the number of redirects is + exhausted, to raise a MaxRetryError, or to return a response with a + response code in the 3xx range. + """ + + DEFAULT_METHOD_WHITELIST = frozenset([ + 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']) + + #: Maximum backoff time. + BACKOFF_MAX = 120 + + def __init__(self, total=10, connect=None, read=None, redirect=None, + method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None, + backoff_factor=0, raise_on_redirect=True, _observed_errors=0): + + self.total = total + self.connect = connect + self.read = read + + if redirect is False or total is False: + redirect = 0 + raise_on_redirect = False + + self.redirect = redirect + self.status_forcelist = status_forcelist or set() + self.method_whitelist = method_whitelist + self.backoff_factor = backoff_factor + self.raise_on_redirect = raise_on_redirect + self._observed_errors = _observed_errors # TODO: use .history instead? + + def new(self, **kw): + params = dict( + total=self.total, + connect=self.connect, read=self.read, redirect=self.redirect, + method_whitelist=self.method_whitelist, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, + raise_on_redirect=self.raise_on_redirect, + _observed_errors=self._observed_errors, + ) + params.update(kw) + return type(self)(**params) + + @classmethod + def from_int(cls, retries, redirect=True, default=None): + """ Backwards-compatibility for the old retries format.""" + if retries is None: + retries = default if default is not None else cls.DEFAULT + + if isinstance(retries, Retry): + return retries + + redirect = bool(redirect) and None + new_retries = cls(retries, redirect=redirect) + log.debug("Converted retries value: %r -> %r" % (retries, new_retries)) + return new_retries + + def get_backoff_time(self): + """ Formula for computing the current backoff + + :rtype: float + """ + if self._observed_errors <= 1: + return 0 + + backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1)) + return min(self.BACKOFF_MAX, backoff_value) + + def sleep(self): + """ Sleep between retry attempts using an exponential backoff. + + By default, the backoff factor is 0 and this method will return + immediately. + """ + backoff = self.get_backoff_time() + if backoff <= 0: + return + time.sleep(backoff) + + def _is_connection_error(self, err): + """ Errors when we're fairly sure that the server did not receive the + request, so it should be safe to retry. + """ + return isinstance(err, ConnectTimeoutError) + + def _is_read_error(self, err): + """ Errors that occur after the request has been started, so we should + assume that the server began processing it. + """ + return isinstance(err, (ReadTimeoutError, ProtocolError)) + + def is_forced_retry(self, method, status_code): + """ Is this method/response retryable? (Based on method/codes whitelists) + """ + if self.method_whitelist and method.upper() not in self.method_whitelist: + return False + + return self.status_forcelist and status_code in self.status_forcelist + + def is_exhausted(self): + """ Are we out of retries? """ + retry_counts = (self.total, self.connect, self.read, self.redirect) + retry_counts = list(filter(None, retry_counts)) + if not retry_counts: + return False + + return min(retry_counts) < 0 + + def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None): + """ Return a new Retry object with incremented retry counters. + + :param response: A response object, or None, if the server did not + return a response. + :type response: :class:`~urllib3.response.HTTPResponse` + :param Exception error: An error encountered during the request, or + None if the response was received successfully. + + :return: A new ``Retry`` object. + """ + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + + _observed_errors = self._observed_errors + connect = self.connect + read = self.read + redirect = self.redirect + cause = 'unknown' + + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + elif connect is not None: + connect -= 1 + _observed_errors += 1 + + elif error and self._is_read_error(error): + # Read retry? + if read is False: + raise six.reraise(type(error), error, _stacktrace) + elif read is not None: + read -= 1 + _observed_errors += 1 + + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + cause = 'too many redirects' + + else: + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist + _observed_errors += 1 + cause = ResponseError.GENERIC_ERROR + if response and response.status: + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status) + + new_retry = self.new( + total=total, + connect=connect, read=read, redirect=redirect, + _observed_errors=_observed_errors) + + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error or ResponseError(cause)) + + log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) + + return new_retry + + + def __repr__(self): + return ('{cls.__name__}(total={self.total}, connect={self.connect}, ' + 'read={self.read}, redirect={self.redirect})').format( + cls=type(self), self=self) + + +# For backwards compatibility (equivalent to pre-v1.9): +Retry.DEFAULT = Retry(3) diff --git a/botocore/vendored/requests/packages/urllib3/util/ssl_.py b/botocore/vendored/requests/packages/urllib3/util/ssl_.py new file mode 100644 index 0000000000..a788b1b98c --- /dev/null +++ b/botocore/vendored/requests/packages/urllib3/util/ssl_.py @@ -0,0 +1,254 @@ +from binascii import hexlify, unhexlify +from hashlib import md5, sha1 + +from ..exceptions import SSLError + + +SSLContext = None +HAS_SNI = False +create_default_context = None + +import errno +import ssl + +try: # Test for SSL features + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + +try: + from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION +except ImportError: + OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 + OP_NO_COMPRESSION = 0x20000 + +try: + from ssl import _DEFAULT_CIPHERS +except ImportError: + _DEFAULT_CIPHERS = ( + 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' + 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:ECDH+RC4:' + 'DH+RC4:RSA+RC4:!aNULL:!eNULL:!MD5' + ) + +try: + from ssl import SSLContext # Modern SSL? +except ImportError: + import sys + + class SSLContext(object): # Platform-specific: Python 2 & 3.1 + supports_set_ciphers = sys.version_info >= (2, 7) + + def __init__(self, protocol_version): + self.protocol = protocol_version + # Use default values from a real SSLContext + self.check_hostname = False + self.verify_mode = ssl.CERT_NONE + self.ca_certs = None + self.options = 0 + self.certfile = None + self.keyfile = None + self.ciphers = None + + def load_cert_chain(self, certfile, keyfile): + self.certfile = certfile + self.keyfile = keyfile + + def load_verify_locations(self, location): + self.ca_certs = location + + def set_ciphers(self, cipher_suite): + if not self.supports_set_ciphers: + raise TypeError( + 'Your version of Python does not support setting ' + 'a custom cipher suite. Please upgrade to Python ' + '2.7, 3.2, or later if you need this functionality.' + ) + self.ciphers = cipher_suite + + def wrap_socket(self, socket, server_hostname=None): + kwargs = { + 'keyfile': self.keyfile, + 'certfile': self.certfile, + 'ca_certs': self.ca_certs, + 'cert_reqs': self.verify_mode, + 'ssl_version': self.protocol, + } + if self.supports_set_ciphers: # Platform-specific: Python 2.7+ + return wrap_socket(socket, ciphers=self.ciphers, **kwargs) + else: # Platform-specific: Python 2.6 + return wrap_socket(socket, **kwargs) + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + # Maps the length of a digest to a possible hash function producing + # this digest. + hashfunc_map = { + 16: md5, + 20: sha1 + } + + fingerprint = fingerprint.replace(':', '').lower() + digest_length, odd = divmod(len(fingerprint), 2) + + if odd or digest_length not in hashfunc_map: + raise SSLError('Fingerprint is of invalid length.') + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + hashfunc = hashfunc_map[digest_length] + + cert_digest = hashfunc(cert).digest() + + if not cert_digest == fingerprint_bytes: + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(hexlify(fingerprint_bytes), + hexlify(cert_digest))) + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, + options=None, ciphers=None): + """All arguments have the same meaning as ``ssl_wrap_socket``. + + By default, this function does a lot of the same work that + ``ssl.create_default_context`` does on Python 3.4+. It: + + - Disables SSLv2, SSLv3, and compression + - Sets a restricted set of server ciphers + + If you wish to enable SSLv3, you can do:: + + from urllib3.util import ssl_ + context = ssl_.create_urllib3_context() + context.options &= ~ssl_.OP_NO_SSLv3 + + You can do the same to enable compression (substituting ``COMPRESSION`` + for ``SSLv3`` in the last line above). + + :param ssl_version: + The desired protocol version to use. This will default to + PROTOCOL_SSLv23 which will negotiate the highest protocol that both + the server and your installation of OpenSSL support. + :param cert_reqs: + Whether to require the certificate verification. This defaults to + ``ssl.CERT_REQUIRED``. + :param options: + Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, + ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. + :param ciphers: + Which cipher suites to allow the server to select. + :returns: + Constructed SSLContext object with specified options + :rtype: SSLContext + """ + context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + + if options is None: + options = 0 + # SSLv2 is easily broken and is considered harmful and dangerous + options |= OP_NO_SSLv2 + # SSLv3 has several problems and is now dangerous + options |= OP_NO_SSLv3 + # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ + # (issue #309) + options |= OP_NO_COMPRESSION + + context.options |= options + + if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 + context.set_ciphers(ciphers or _DEFAULT_CIPHERS) + + context.verify_mode = cert_reqs + if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 + context.check_hostname = (context.verify_mode == ssl.CERT_REQUIRED) + return context + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None, ciphers=None, ssl_context=None): + """ + All arguments except for server_hostname and ssl_context have the same + meaning as they do when using :func:`ssl.wrap_socket`. + + :param server_hostname: + When SNI is supported, the expected hostname of the certificate + :param ssl_context: + A pre-made :class:`SSLContext` object. If none is provided, one will + be created using :func:`create_urllib3_context`. + :param ciphers: + A string of ciphers we wish the client to support. This is not + supported on Python 2.6 as the ssl module does not support it. + """ + context = ssl_context + if context is None: + context = create_urllib3_context(ssl_version, cert_reqs, + ciphers=ciphers) + + if ca_certs: + try: + context.load_verify_locations(ca_certs) + except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 + raise SSLError(e) + # Py33 raises FileNotFoundError which subclasses OSError + # These are not equivalent unless we check the errno attribute + except OSError as e: # Platform-specific: Python 3.3 and beyond + if e.errno == errno.ENOENT: + raise SSLError(e) + raise + if certfile: + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) diff --git a/botocore/vendored/requests/packages/urllib3/util/timeout.py b/botocore/vendored/requests/packages/urllib3/util/timeout.py new file mode 100644 index 0000000000..ea7027f3f5 --- /dev/null +++ b/botocore/vendored/requests/packages/urllib3/util/timeout.py @@ -0,0 +1,240 @@ +# The default socket timeout, used by httplib to indicate that no timeout was +# specified by the user +from socket import _GLOBAL_DEFAULT_TIMEOUT +import time + +from ..exceptions import TimeoutStateError + +# A sentinel value to indicate that no timeout was specified by the user in +# urllib3 +_Default = object() + +def current_time(): + """ + Retrieve the current time. This function is mocked out in unit testing. + """ + return time.time() + + +class Timeout(object): + """ Timeout configuration. + + Timeouts can be defined as a default for a pool:: + + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) + + Timeouts can be disabled by setting all the parameters to ``None``:: + + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) + + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + :type total: integer, float, or None + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout. + + :type read: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. + + For example, Python's DNS resolver does not obey the timeout specified + on the socket. Other factors that can affect total request time include + high CPU load, high swap, the program running at a low priority level, + or other behaviors. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not trigger, even though the request will take + several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, total=None, connect=_Default, read=_Default): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid. + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is + used to specify in error messages. + :return: The validated and casted version of the given value. + :raises ValueError: If the type is not an integer or a float, or if it + is a numeric value less than zero. + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value + passed to this function. + + :param timeout: The legacy timeout value. + :type timeout: integer, float, sentinel default object, or None + :return: Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: Elapsed time. + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: Connect timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: Value to use for the read timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # In case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read diff --git a/botocore/vendored/requests/packages/urllib3/util/url.py b/botocore/vendored/requests/packages/urllib3/util/url.py new file mode 100644 index 0000000000..b2ec834fe7 --- /dev/null +++ b/botocore/vendored/requests/packages/urllib3/util/url.py @@ -0,0 +1,212 @@ +from collections import namedtuple + +from ..exceptions import LocationParseError + + +url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] + + +class Url(namedtuple('Url', url_attrs)): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. + """ + slots = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, + query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, + query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + + @property + def url(self): + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: :: + + >>> U = parse_url('http://google.com/mail/') + >>> U.url + 'http://google.com/mail/' + >>> Url('http', 'username:password', 'host.com', 80, + ... '/path', 'query', 'fragment').url + 'http://username:password@host.com:80/path?query#fragment' + """ + scheme, auth, host, port, path, query, fragment = self + url = '' + + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + '://' + if auth is not None: + url += auth + '@' + if host is not None: + url += host + if port is not None: + url += ':' + str(port) + if path is not None: + url += path + if query is not None: + url += '?' + query + if fragment is not None: + url += '#' + fragment + + return url + + def __str__(self): + return self.url + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example:: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx+1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example:: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/mail/', ...) + >>> parse_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> parse_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this implementations does silly things to be optimal + # on CPython. + + if not url: + # Empty + return Url() + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url.split(']', 1) + host += ']' + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError(url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + +def get_host(url): + """ + Deprecated. Use :func:`.parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port diff --git a/botocore/vendored/requests/sessions.py b/botocore/vendored/requests/sessions.py index cc72f65d9d..4f30696353 100644 --- a/botocore/vendored/requests/sessions.py +++ b/botocore/vendored/requests/sessions.py @@ -12,26 +12,31 @@ from collections import Mapping from datetime import datetime -from .compat import cookielib, OrderedDict, urljoin, urlparse, urlunparse -from .cookies import cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar -from .models import Request, PreparedRequest +from .auth import _basic_auth_str +from .compat import cookielib, OrderedDict, urljoin, urlparse +from .cookies import ( + cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) +from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT from .hooks import default_hooks, dispatch_hook -from .utils import to_key_val_list, default_headers -from .exceptions import TooManyRedirects, InvalidSchema +from .utils import to_key_val_list, default_headers, to_native_string +from .exceptions import ( + TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError) +from .packages.urllib3._collections import RecentlyUsedContainer from .structures import CaseInsensitiveDict from .adapters import HTTPAdapter -from .utils import requote_uri, get_environ_proxies, get_netrc_auth +from .utils import ( + requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, + get_auth_from_url +) from .status_codes import codes -REDIRECT_STATI = ( - codes.moved, # 301 - codes.found, # 302 - codes.other, # 303 - codes.temporary_moved, # 307 -) -DEFAULT_REDIRECT_LIMIT = 30 + +# formerly defined here, reexposed here for backward compatibility +from .models import REDIRECT_STATI + +REDIRECT_CACHE_SIZE = 1000 def merge_setting(request_setting, session_setting, dict_class=OrderedDict): @@ -62,21 +67,48 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict): if v is None: del merged_setting[k] + merged_setting = dict((k, v) for (k, v) in merged_setting.items() if v is not None) + return merged_setting +def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict): + """ + Properly merges both requests and session hooks. + + This is necessary because when request_hooks == {'response': []}, the + merge breaks Session hooks entirely. + """ + if session_hooks is None or session_hooks.get('response') == []: + return request_hooks + + if request_hooks is None or request_hooks.get('response') == []: + return session_hooks + + return merge_setting(request_hooks, session_hooks, dict_class) + + class SessionRedirectMixin(object): def resolve_redirects(self, resp, req, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Receives a Response. Returns a generator of Responses.""" i = 0 + hist = [] # keep track of history - # ((resp.status_code is codes.see_other)) - while ('location' in resp.headers and resp.status_code in REDIRECT_STATI): + while resp.is_redirect: prepared_request = req.copy() - resp.content # Consume socket so it can be released + if i > 0: + # Update history and keep track of redirects. + hist.append(resp) + new_hist = list(hist) + resp.history = new_hist + + try: + resp.content # Consume socket so it can be released + except (ChunkedEncodingError, ContentDecodingError, RuntimeError): + resp.raw.read(decode_content=False) if i >= self.max_redirects: raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects) @@ -94,34 +126,40 @@ def resolve_redirects(self, resp, req, stream=False, timeout=None, # The scheme should be lower case... parsed = urlparse(url) - parsed = (parsed.scheme.lower(), parsed.netloc, parsed.path, - parsed.params, parsed.query, parsed.fragment) - url = urlunparse(parsed) + url = parsed.geturl() - # Facilitate non-RFC2616-compliant 'location' headers + # Facilitate relative 'location' headers, as allowed by RFC 7231. # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') # Compliant with RFC3986, we percent encode the url. - if not urlparse(url).netloc: + if not parsed.netloc: url = urljoin(resp.url, requote_uri(url)) else: url = requote_uri(url) - prepared_request.url = url + prepared_request.url = to_native_string(url) + # Cache the url, unless it redirects to itself. + if resp.is_permanent_redirect and req.url != prepared_request.url: + self.redirect_cache[req.url] = prepared_request.url - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 + # http://tools.ietf.org/html/rfc7231#section-6.4.4 if (resp.status_code == codes.see_other and method != 'HEAD'): method = 'GET' # Do what the browsers do, despite standards... - if (resp.status_code in (codes.moved, codes.found) and - method not in ('GET', 'HEAD')): + # First, turn 302s into GETs. + if resp.status_code == codes.found and method != 'HEAD': + method = 'GET' + + # Second, if a POST is responded to with a 301, turn it into a GET. + # This bizarre behaviour is explained in Issue 1704. + if resp.status_code == codes.moved and method == 'POST': method = 'GET' prepared_request.method = method # https://github.com/kennethreitz/requests/issues/1084 - if resp.status_code not in (codes.temporary, codes.resume): + if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): if 'Content-Length' in prepared_request.headers: del prepared_request.headers['Content-Length'] @@ -133,10 +171,19 @@ def resolve_redirects(self, resp, req, stream=False, timeout=None, except KeyError: pass - prepared_request.prepare_cookies(self.cookies) + extract_cookies_to_jar(prepared_request._cookies, prepared_request, resp.raw) + prepared_request._cookies.update(self.cookies) + prepared_request.prepare_cookies(prepared_request._cookies) + + # Rebuild auth and proxy information. + proxies = self.rebuild_proxies(prepared_request, proxies) + self.rebuild_auth(prepared_request, resp) + + # Override the original request. + req = prepared_request resp = self.send( - prepared_request, + req, stream=stream, timeout=timeout, verify=verify, @@ -150,6 +197,68 @@ def resolve_redirects(self, resp, req, stream=False, timeout=None, i += 1 yield resp + def rebuild_auth(self, prepared_request, response): + """ + When being redirected we may want to strip authentication from the + request to avoid leaking credentials. This method intelligently removes + and reapplies authentication where possible to avoid credential loss. + """ + headers = prepared_request.headers + url = prepared_request.url + + if 'Authorization' in headers: + # If we get redirected to a new host, we should strip out any + # authentication headers. + original_parsed = urlparse(response.request.url) + redirect_parsed = urlparse(url) + + if (original_parsed.hostname != redirect_parsed.hostname): + del headers['Authorization'] + + # .netrc might have more auth for us on our new host. + new_auth = get_netrc_auth(url) if self.trust_env else None + if new_auth is not None: + prepared_request.prepare_auth(new_auth) + + return + + def rebuild_proxies(self, prepared_request, proxies): + """ + This method re-evaluates the proxy configuration by considering the + environment variables. If we are redirected to a URL covered by + NO_PROXY, we strip the proxy configuration. Otherwise, we set missing + proxy keys for this URL (in case they were stripped by a previous + redirect). + + This method also replaces the Proxy-Authorization header where + necessary. + """ + headers = prepared_request.headers + url = prepared_request.url + scheme = urlparse(url).scheme + new_proxies = proxies.copy() if proxies is not None else {} + + if self.trust_env and not should_bypass_proxies(url): + environ_proxies = get_environ_proxies(url) + + proxy = environ_proxies.get(scheme) + + if proxy: + new_proxies.setdefault(scheme, environ_proxies[scheme]) + + if 'Proxy-Authorization' in headers: + del headers['Proxy-Authorization'] + + try: + username, password = get_auth_from_url(new_proxies[scheme]) + except KeyError: + username, password = None, None + + if username and password: + headers['Proxy-Authorization'] = _basic_auth_str(username, password) + + return new_proxies + class Session(SessionRedirectMixin): """A Requests session. @@ -165,9 +274,10 @@ class Session(SessionRedirectMixin): """ __attrs__ = [ - 'headers', 'cookies', 'auth', 'timeout', 'proxies', 'hooks', - 'params', 'verify', 'cert', 'prefetch', 'adapters', 'stream', - 'trust_env', 'max_redirects'] + 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', + 'cert', 'prefetch', 'adapters', 'stream', 'trust_env', + 'max_redirects', + ] def __init__(self): @@ -220,6 +330,9 @@ def __init__(self): self.mount('https://', HTTPAdapter()) self.mount('http://', HTTPAdapter()) + # Only store 1000 redirects to prevent using infinite memory + self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE) + def __enter__(self): return self @@ -233,7 +346,7 @@ def prepare_request(self, request): :class:`Session`. :param request: :class:`Request` instance to prepare with this - session's settings. + session's settings. """ cookies = request.cookies or {} @@ -242,9 +355,8 @@ def prepare_request(self, request): cookies = cookiejar_from_dict(cookies) # Merge with session cookies - merged_cookies = RequestsCookieJar() - merged_cookies.update(self.cookies) - merged_cookies.update(cookies) + merged_cookies = merge_cookies( + merge_cookies(RequestsCookieJar(), self.cookies), cookies) # Set environment's basic authentication if not explicitly set. @@ -258,11 +370,12 @@ def prepare_request(self, request): url=request.url, files=request.files, data=request.data, + json=request.json, headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict), params=merge_setting(request.params, self.params), auth=merge_setting(auth, self.auth), cookies=merged_cookies, - hooks=merge_setting(request.hooks, self.hooks), + hooks=merge_hooks(request.hooks, self.hooks), ) return p @@ -279,7 +392,8 @@ def request(self, method, url, hooks=None, stream=None, verify=None, - cert=None): + cert=None, + json=None): """Constructs a :class:`Request `, prepares it and sends it. Returns :class:`Response ` object. @@ -289,17 +403,22 @@ def request(self, method, url, string for the :class:`Request`. :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param json: (optional) json to send in the body of the + :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param files: (optional) Dictionary of 'filename': file-like-objects + :param files: (optional) Dictionary of ``'filename': file-like-objects`` for multipart encoding upload. :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. - :param timeout: (optional) Float describing the timeout of the - request. - :param allow_redirects: (optional) Boolean. Set to True by default. + :param timeout: (optional) How long to wait for the server to send + data before giving up, as a float, or a (`connect timeout, read + timeout `_) tuple. + :type timeout: float or tuple + :param allow_redirects: (optional) Set to True by default. + :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param stream: (optional) whether to immediately download the response @@ -309,6 +428,9 @@ def request(self, method, url, :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. """ + + method = to_native_string(method) + # Create the Request. req = Request( method = method.upper(), @@ -316,6 +438,7 @@ def request(self, method, url, headers = headers, files = files, data = data or {}, + json = json, params = params or {}, auth = auth, cookies = cookies, @@ -323,41 +446,18 @@ def request(self, method, url, ) prep = self.prepare_request(req) - # Add param cookies to session cookies - self.cookies = cookiejar_from_dict(cookies, cookiejar=self.cookies, overwrite=False) - proxies = proxies or {} - # Gather clues from the surrounding environment. - if self.trust_env: - # Set environment's proxies. - env_proxies = get_environ_proxies(url) or {} - for (k, v) in env_proxies.items(): - proxies.setdefault(k, v) - - # Look for configuration. - if not verify and verify is not False: - verify = os.environ.get('REQUESTS_CA_BUNDLE') - - # Curl compatibility. - if not verify and verify is not False: - verify = os.environ.get('CURL_CA_BUNDLE') - - # Merge all the kwargs. - proxies = merge_setting(proxies, self.proxies) - stream = merge_setting(stream, self.stream) - verify = merge_setting(verify, self.verify) - cert = merge_setting(cert, self.cert) + settings = self.merge_environment_settings( + prep.url, proxies, stream, verify, cert + ) # Send the request. send_kwargs = { - 'stream': stream, 'timeout': timeout, - 'verify': verify, - 'cert': cert, - 'proxies': proxies, 'allow_redirects': allow_redirects, } + send_kwargs.update(settings) resp = self.send(prep, **send_kwargs) return resp @@ -392,15 +492,16 @@ def head(self, url, **kwargs): kwargs.setdefault('allow_redirects', False) return self.request('HEAD', url, **kwargs) - def post(self, url, data=None, **kwargs): + def post(self, url, data=None, json=None, **kwargs): """Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. """ - return self.request('POST', url, data=data, **kwargs) + return self.request('POST', url, data=data, json=json, **kwargs) def put(self, url, data=None, **kwargs): """Sends a PUT request. Returns :class:`Response` object. @@ -445,8 +546,15 @@ def send(self, request, **kwargs): if not isinstance(request, PreparedRequest): raise ValueError('You can only send PreparedRequests.') - # Set up variables needed for resolve_redirects and dispatching of - # hooks + checked_urls = set() + while request.url in self.redirect_cache: + checked_urls.add(request.url) + new_url = self.redirect_cache.get(request.url) + if new_url in checked_urls: + break + request.url = new_url + + # Set up variables needed for resolve_redirects and dispatching of hooks allow_redirects = kwargs.pop('allow_redirects', True) stream = kwargs.get('stream') timeout = kwargs.get('timeout') @@ -460,8 +568,10 @@ def send(self, request, **kwargs): # Start time (approximately) of the request start = datetime.utcnow() + # Send the request r = adapter.send(request, **kwargs) + # Total elapsed time of the request (approximately) r.elapsed = datetime.utcnow() - start @@ -470,15 +580,20 @@ def send(self, request, **kwargs): # Persist cookies if r.history: + # If the hooks create history then we want those cookies too for resp in r.history: extract_cookies_to_jar(self.cookies, resp.request, resp.raw) + extract_cookies_to_jar(self.cookies, request, r.raw) # Redirect resolving generator. - gen = self.resolve_redirects(r, request, stream=stream, - timeout=timeout, verify=verify, cert=cert, - proxies=proxies) + gen = self.resolve_redirects(r, request, + stream=stream, + timeout=timeout, + verify=verify, + cert=cert, + proxies=proxies) # Resolve redirects if allowed. history = [resp for resp in gen] if allow_redirects else [] @@ -489,10 +604,37 @@ def send(self, request, **kwargs): history.insert(0, r) # Get the last request made r = history.pop() - r.history = tuple(history) + r.history = history + + if not stream: + r.content return r + def merge_environment_settings(self, url, proxies, stream, verify, cert): + """Check the environment and merge it with some settings.""" + # Gather clues from the surrounding environment. + if self.trust_env: + # Set environment's proxies. + env_proxies = get_environ_proxies(url) or {} + for (k, v) in env_proxies.items(): + proxies.setdefault(k, v) + + # Look for requests environment configuration and be compatible + # with cURL. + if verify is True or verify is None: + verify = (os.environ.get('REQUESTS_CA_BUNDLE') or + os.environ.get('CURL_CA_BUNDLE')) + + # Merge all the kwargs. + proxies = merge_setting(proxies, self.proxies) + stream = merge_setting(stream, self.stream) + verify = merge_setting(verify, self.verify) + cert = merge_setting(cert, self.cert) + + return {'verify': verify, 'proxies': proxies, 'stream': stream, + 'cert': cert} + def get_adapter(self, url): """Returns the appropriate connnection adapter for the given URL.""" for (prefix, adapter) in self.adapters.items(): @@ -512,18 +654,27 @@ def mount(self, prefix, adapter): """Registers a connection adapter to a prefix. Adapters are sorted in descending order by key length.""" + self.adapters[prefix] = adapter keys_to_move = [k for k in self.adapters if len(k) < len(prefix)] + for key in keys_to_move: self.adapters[key] = self.adapters.pop(key) def __getstate__(self): - return dict((attr, getattr(self, attr, None)) for attr in self.__attrs__) + state = dict((attr, getattr(self, attr, None)) for attr in self.__attrs__) + state['redirect_cache'] = dict(self.redirect_cache) + return state def __setstate__(self, state): + redirect_cache = state.pop('redirect_cache', {}) for attr, value in state.items(): setattr(self, attr, value) + self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE) + for redirect, to in redirect_cache.items(): + self.redirect_cache[redirect] = to + def session(): """Returns a :class:`Session` for context-management.""" diff --git a/botocore/vendored/requests/status_codes.py b/botocore/vendored/requests/status_codes.py index ed7a8660a6..e0887f210a 100644 --- a/botocore/vendored/requests/status_codes.py +++ b/botocore/vendored/requests/status_codes.py @@ -30,7 +30,8 @@ 305: ('use_proxy',), 306: ('switch_proxy',), 307: ('temporary_redirect', 'temporary_moved', 'temporary'), - 308: ('resume_incomplete', 'resume'), + 308: ('permanent_redirect', + 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 # Client Error. 400: ('bad_request', 'bad'), diff --git a/botocore/vendored/requests/structures.py b/botocore/vendored/requests/structures.py index a1759137aa..3e5f2faa2e 100644 --- a/botocore/vendored/requests/structures.py +++ b/botocore/vendored/requests/structures.py @@ -8,30 +8,7 @@ """ -import os import collections -from itertools import islice - - -class IteratorProxy(object): - """docstring for IteratorProxy""" - def __init__(self, i): - self.i = i - # self.i = chain.from_iterable(i) - - def __iter__(self): - return self.i - - def __len__(self): - if hasattr(self.i, '__len__'): - return len(self.i) - if hasattr(self.i, 'len'): - return self.i.len - if hasattr(self.i, 'fileno'): - return os.fstat(self.i.fileno()).st_size - - def read(self, n): - return "".join(islice(self.i, None, n)) class CaseInsensitiveDict(collections.MutableMapping): @@ -46,7 +23,7 @@ class CaseInsensitiveDict(collections.MutableMapping): case of the last key to be set, and ``iter(instance)``, ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()`` will contain case-sensitive keys. However, querying and contains - testing is case insensitive: + testing is case insensitive:: cid = CaseInsensitiveDict() cid['Accept'] = 'application/json' @@ -106,8 +83,7 @@ def copy(self): return CaseInsensitiveDict(self._store.values()) def __repr__(self): - return '%s(%r)' % (self.__class__.__name__, dict(self.items())) - + return str(dict(self.items())) class LookupDict(dict): """Dictionary lookup object.""" diff --git a/botocore/vendored/requests/utils.py b/botocore/vendored/requests/utils.py index 5000105a65..7467941447 100644 --- a/botocore/vendored/requests/utils.py +++ b/botocore/vendored/requests/utils.py @@ -17,16 +17,18 @@ import platform import re import sys -from netrc import netrc, NetrcParseError +import socket +import struct +import warnings from . import __version__ from . import certs from .compat import parse_http_list as _parse_list_header -from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse, - is_py2, is_py3, builtin_str, getproxies, proxy_bypass) +from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2, + builtin_str, getproxies, proxy_bypass, urlunparse) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict -from .exceptions import MissingSchema, InvalidURL +from .exceptions import InvalidURL _hush_pyflakes = (RequestsCookieJar,) @@ -60,23 +62,34 @@ def super_len(o): return os.fstat(fileno).st_size if hasattr(o, 'getvalue'): - # e.g. BytesIO, cStringIO.StringI + # e.g. BytesIO, cStringIO.StringIO return len(o.getvalue()) + def get_netrc_auth(url): """Returns the Requests tuple auth for a given url from netrc.""" try: - locations = (os.path.expanduser('~/{0}'.format(f)) for f in NETRC_FILES) + from netrc import netrc, NetrcParseError + netrc_path = None - for loc in locations: - if os.path.exists(loc) and not netrc_path: + for f in NETRC_FILES: + try: + loc = os.path.expanduser('~/{0}'.format(f)) + except KeyError: + # os.path.expanduser can fail when $HOME is undefined and + # getpwuid fails. See http://bugs.python.org/issue20164 & + # https://github.com/kennethreitz/requests/issues/1846 + return + + if os.path.exists(loc): netrc_path = loc + break # Abort early if there isn't one. if netrc_path is None: - return netrc_path + return ri = urlparse(url) @@ -102,7 +115,7 @@ def get_netrc_auth(url): def guess_filename(obj): """Tries to guess the filename of the given object.""" name = getattr(obj, 'name', None) - if name and name[0] != '<' and name[-1] != '>': + if name and isinstance(name, builtin_str) and name[0] != '<' and name[-1] != '>': return os.path.basename(name) @@ -275,6 +288,11 @@ def get_encodings_from_content(content): :param content: bytestring to extract encodings from. """ + warnings.warn(( + 'In requests 3.0, get_encodings_from_content will be removed. For ' + 'more information, please see the discussion on issue #2266. (This' + ' warning should only appear once.)'), + DeprecationWarning) charset_re = re.compile(r']', flags=re.I) pragma_re = re.compile(r']', flags=re.I) @@ -339,12 +357,14 @@ def get_unicode_from_response(r): Tried: 1. charset from content-type - - 2. every encodings from ```` - - 3. fall back and replace all unicode characters + 2. fall back and replace all unicode characters """ + warnings.warn(( + 'In requests 3.0, get_unicode_from_response will be removed. For ' + 'more information, please see the discussion on issue #2266. (This' + ' warning should only appear once.)'), + DeprecationWarning) tried_encodings = [] @@ -404,9 +424,60 @@ def requote_uri(uri): return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") -def get_environ_proxies(url): - """Return a dict of environment proxies.""" +def address_in_network(ip, net): + """ + This function allows you to check if on IP belongs to a network subnet + Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24 + returns False if ip = 192.168.1.1 and net = 192.168.100.0/24 + """ + ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0] + netaddr, bits = net.split('/') + netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0] + network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask + return (ipaddr & netmask) == (network & netmask) + + +def dotted_netmask(mask): + """ + Converts mask from /xx format to xxx.xxx.xxx.xxx + Example: if mask is 24 function returns 255.255.255.0 + """ + bits = 0xffffffff ^ (1 << 32 - mask) - 1 + return socket.inet_ntoa(struct.pack('>I', bits)) + + +def is_ipv4_address(string_ip): + try: + socket.inet_aton(string_ip) + except socket.error: + return False + return True + + +def is_valid_cidr(string_network): + """Very simple check of the cidr format in no_proxy variable""" + if string_network.count('/') == 1: + try: + mask = int(string_network.split('/')[1]) + except ValueError: + return False + + if mask < 1 or mask > 32: + return False + + try: + socket.inet_aton(string_network.split('/')[0]) + except socket.error: + return False + else: + return False + return True + +def should_bypass_proxies(url): + """ + Returns whether we should bypass proxies or not. + """ get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL @@ -419,24 +490,44 @@ def get_environ_proxies(url): # the end of the netloc, both with and without the port. no_proxy = no_proxy.replace(' ', '').split(',') - for host in no_proxy: - if netloc.endswith(host) or netloc.split(':')[0].endswith(host): - # The URL does match something in no_proxy, so we don't want - # to apply the proxies on this URL. - return {} + ip = netloc.split(':')[0] + if is_ipv4_address(ip): + for proxy_ip in no_proxy: + if is_valid_cidr(proxy_ip): + if address_in_network(ip, proxy_ip): + return True + else: + for host in no_proxy: + if netloc.endswith(host) or netloc.split(':')[0].endswith(host): + # The URL does match something in no_proxy, so we don't want + # to apply the proxies on this URL. + return True # If the system proxy settings indicate that this URL should be bypassed, # don't proxy. - if proxy_bypass(netloc): - return {} + # The proxy_bypass function is incredibly buggy on OS X in early versions + # of Python 2.6, so allow this call to fail. Only catch the specific + # exceptions we've seen, though: this call failing in other ways can reveal + # legitimate problems. + try: + bypass = proxy_bypass(netloc) + except (TypeError, socket.gaierror): + bypass = False - # If we get here, we either didn't have no_proxy set or we're not going - # anywhere that no_proxy applies to, and the system settings don't require - # bypassing the proxy for the current URL. - return getproxies() + if bypass: + return True + return False -def default_user_agent(): +def get_environ_proxies(url): + """Return a dict of environment proxies.""" + if should_bypass_proxies(url): + return {} + else: + return getproxies() + + +def default_user_agent(name="python-requests"): """Return a string representing the default user agent.""" _implementation = platform.python_implementation() @@ -462,7 +553,7 @@ def default_user_agent(): p_system = 'Unknown' p_release = 'Unknown' - return " ".join(['python-requests/%s' % __version__, + return " ".join(['%s/%s' % (name, __version__), '%s/%s' % (_implementation, _implementation_version), '%s/%s' % (p_system, p_release)]) @@ -470,8 +561,9 @@ def default_user_agent(): def default_headers(): return CaseInsensitiveDict({ 'User-Agent': default_user_agent(), - 'Accept-Encoding': ', '.join(('gzip', 'deflate', 'compress')), - 'Accept': '*/*' + 'Accept-Encoding': ', '.join(('gzip', 'deflate')), + 'Accept': '*/*', + 'Connection': 'keep-alive', }) @@ -486,7 +578,7 @@ def parse_header_links(value): replace_chars = " '\"" - for val in value.split(","): + for val in re.split(", *<", value): try: url, params = val.split(";", 1) except ValueError: @@ -544,23 +636,31 @@ def guess_json_utf(data): return None -def except_on_missing_scheme(url): - """Given a URL, raise a MissingSchema exception if the scheme is missing. - """ - scheme, netloc, path, params, query, fragment = urlparse(url) +def prepend_scheme_if_needed(url, new_scheme): + '''Given a URL that may or may not have a scheme, prepend the given scheme. + Does not replace a present scheme with the one provided as an argument.''' + scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) - if not scheme: - raise MissingSchema('Proxy URLs must have explicit schemes.') + # urlparse is a finicky beast, and sometimes decides that there isn't a + # netloc present. Assume that it's being over-cautious, and switch netloc + # and path if urlparse decided there was no netloc. + if not netloc: + netloc, path = path, netloc + + return urlunparse((scheme, netloc, path, params, query, fragment)) def get_auth_from_url(url): """Given a url with authentication components, extract them into a tuple of username,password.""" - if url: - parsed = urlparse(url) - return (parsed.username, parsed.password) - else: - return ('', '') + parsed = urlparse(url) + + try: + auth = (unquote(parsed.username), unquote(parsed.password)) + except (AttributeError, TypeError): + auth = ('', '') + + return auth def to_native_string(string, encoding='ascii'): @@ -580,3 +680,18 @@ def to_native_string(string, encoding='ascii'): out = string.decode(encoding) return out + + +def urldefragauth(url): + """ + Given a url remove the fragment and the authentication part + """ + scheme, netloc, path, params, query, fragment = urlparse(url) + + # see func:`prepend_scheme_if_needed` + if not netloc: + netloc, path = path, netloc + + netloc = netloc.rsplit('@', 1)[-1] + + return urlunparse((scheme, netloc, path, params, query, '')) diff --git a/tests/unit/test_endpoint.py b/tests/unit/test_endpoint.py index 90cce94a09..a81bcca5a5 100644 --- a/tests/unit/test_endpoint.py +++ b/tests/unit/test_endpoint.py @@ -18,8 +18,10 @@ from botocore.vendored.requests.models import Response from botocore.compat import six +from botocore.awsrequest import AWSRequest from botocore.endpoint import get_endpoint, Endpoint, DEFAULT_TIMEOUT from botocore.endpoint import EndpointCreator +from botocore.endpoint import PreserveAuthSession from botocore.auth import SigV4Auth from botocore.session import Session from botocore.exceptions import UnknownServiceStyle @@ -318,3 +320,39 @@ def test_endpoint_resolver_uses_credential_scope(self): Mock(), 'user-agent') endpoint = creator.create_endpoint(self.service_model) self.assertEqual(endpoint.region_name, 'us-east-1') + + +class TestAWSSession(unittest.TestCase): + def test_auth_header_preserved_from_s3_redirects(self): + request = AWSRequest() + request.url = 'https://bucket.s3.amazonaws.com/' + request.method = 'GET' + request.headers['Authorization'] = 'original auth header' + prepared_request = request.prepare() + + fake_response = Mock() + fake_response.headers = { + 'location': 'https://bucket.s3-us-west-2.amazonaws.com'} + fake_response.url = request.url + fake_response.status_code = 307 + fake_response.is_permanent_redirect = False + # This line is needed to disable the cookie handling + # code in requests. + fake_response.raw._original_response = None + + success_response = Mock() + success_response.raw._original_response = None + success_response.is_redirect = False + success_response.status_code = 200 + session = PreserveAuthSession() + session.send = Mock(return_value=success_response) + + responses = list(session.resolve_redirects( + fake_response, prepared_request, stream=False)) + + redirected_request = session.send.call_args[0][0] + # The Authorization header for the newly sent request should + # still have our original Authorization header. + self.assertEqual( + redirected_request.headers['Authorization'], + 'original auth header')