python-stdnum branch master updated. 1.9-3-g04f78fb
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
python-stdnum branch master updated. 1.9-3-g04f78fb
- From: Commits of the python-stdnum project <python-stdnum-commits [at] lists.arthurdejong.org>
- To: python-stdnum-commits [at] lists.arthurdejong.org
- Reply-to: python-stdnum-users [at] lists.arthurdejong.org
- Subject: python-stdnum branch master updated. 1.9-3-g04f78fb
- Date: Tue, 1 May 2018 23:14:38 +0200 (CEST)
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "python-stdnum".
The branch, master has been updated
via 04f78fb6613cccbd78e32a569ccf4ec2e5e1d478 (commit)
via bae6f19f265ae6f45c6f42649fd70d1c005added (commit)
via 08d105392029bf430de4a854cf250215ecebf6ba (commit)
from d9defc8b514e5f2d9c545de23054e416bd7bd2ab (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
https://arthurdejong.org/git/python-stdnum/commit/?id=04f78fb6613cccbd78e32a569ccf4ec2e5e1d478
commit 04f78fb6613cccbd78e32a569ccf4ec2e5e1d478
Author: Arthur de Jong <arthur@arthurdejong.org>
Date: Tue May 1 23:04:41 2018 +0200
Fix encoding issues in online check
This ensures that all text is unicode internally and encoded to UTF-8 on
response.
diff --git a/online_check/stdnum.wsgi b/online_check/stdnum.wsgi
index 4ac1c36..f4d430c 100755
--- a/online_check/stdnum.wsgi
+++ b/online_check/stdnum.wsgi
@@ -18,17 +18,17 @@
# 02110-1301 USA
import cgi
+import inspect
import json
import os
import re
import sys
-import inspect
sys.stdout = sys.stderr
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'python-stdnum'))
from stdnum.util import (
- get_number_modules, get_module_name, get_module_description)
+ get_module_description, get_module_name, get_number_modules, to_unicode)
_template = None
@@ -41,11 +41,11 @@ def get_conversions(module, number):
args, varargs, varkw, defaults = inspect.getargspec(func)
if defaults:
args = args[:-len(defaults)]
- if args == ['number']:
+ if args == ['number'] and not name.endswith('binary'):
try:
conversion = func(number)
if conversion != number:
- yield (name[3:], conversion)
+ yield (name[3:], to_unicode(conversion))
except Exception:
pass
@@ -59,8 +59,8 @@ def info(module, number):
compact=compactfn(number),
valid=module.is_valid(number),
module=module.__name__.split('.', 1)[1],
- name=get_module_name(module),
- description=get_module_description(module),
+ name=to_unicode(get_module_name(module)),
+ description=to_unicode(get_module_description(module)),
conversions=dict(get_conversions(module, number)))
@@ -89,26 +89,26 @@ def application(environ, start_response):
basedir = os.path.join(
environ['DOCUMENT_ROOT'],
os.path.dirname(environ['SCRIPT_NAME']).strip('/'))
- _template = open(os.path.join(basedir, 'template.html'), 'r').read()
+ _template = to_unicode(open(os.path.join(basedir, 'template.html'),
'rt').read())
is_ajax = environ.get(
'HTTP_X_REQUESTED_WITH', '').lower() == 'xmlhttprequest'
parameters = cgi.parse_qs(environ.get('QUERY_STRING', ''))
results = []
number = ''
if 'number' in parameters:
- number = parameters['number'][0]
+ number = to_unicode(parameters['number'][0])
results = [
info(module, number)
for module in get_number_modules()
if module.is_valid(number)]
- if 'HTTP_X_REQUESTED_WITH' in environ:
+ if is_ajax:
start_response('200 OK', [
('Content-Type', 'application/json'),
('Vary', 'X-Requested-With')])
return [json.dumps(results, indent=2, sort_keys=True)]
start_response('200 OK', [
- ('Content-Type', 'text/html'),
+ ('Content-Type', 'text/html; charset=utf-8'),
('Vary', 'X-Requested-With')])
- return _template % dict(
+ return [(_template % dict(
value=cgi.escape(number, True),
- results='\n'.join(format(data) for data in results))
+ results=u'\n'.join(format(data) for data in results))).encode('utf-8')]
https://arthurdejong.org/git/python-stdnum/commit/?id=bae6f19f265ae6f45c6f42649fd70d1c005added
commit bae6f19f265ae6f45c6f42649fd70d1c005added
Author: Arthur de Jong <arthur@arthurdejong.org>
Date: Tue May 1 22:54:03 2018 +0200
Fix an issue with format of Mexican tax numbers
Fix an issue where the format accepted a mix of personal and company
numberer in validation causing in a raised ValueError exception.
diff --git a/stdnum/mx/rfc.py b/stdnum/mx/rfc.py
index b186950..80b3de2 100644
--- a/stdnum/mx/rfc.py
+++ b/stdnum/mx/rfc.py
@@ -67,14 +67,6 @@ from stdnum.exceptions import *
from stdnum.util import clean, to_unicode
-# regular expression for matching numbers
-_rfc_re = re.compile(u'^[A-Z&Ñ]{3,4}[0-9]{6}[0-9A-Z]{0,5}$')
-
-
-# regular expression for matching the last 3 check digits
-_check_digits_re = re.compile(u'^[1-9A-V][1-9A-Z][0-9A]$')
-
-
# these values should not appear as first part of a personal number
_name_blacklist = set([
'BUEI', 'BUEY', 'CACA', 'CACO', 'CAGA', 'CAGO', 'CAKA', 'CAKO', 'COGE',
@@ -120,20 +112,22 @@ def validate(number, validate_check_digits=False):
"""Check if the number is a valid RFC."""
number = compact(number)
n = to_unicode(number)
- if not _rfc_re.match(n):
- raise InvalidFormat()
if len(n) in (10, 13):
# number assigned to person
+ if not re.match(u'^[A-Z&Ñ]{4}[0-9]{6}[0-9A-Z]{0,3}$', n):
+ raise InvalidFormat()
if n[:4] in _name_blacklist:
raise InvalidComponent()
_get_date(n[4:10])
elif len(n) == 12:
# number assigned to company
+ if not re.match(u'^[A-Z&Ñ]{3}[0-9]{6}[0-9A-Z]{3}$', n):
+ raise InvalidFormat()
_get_date(n[3:9])
else:
raise InvalidLength()
if validate_check_digits and len(n) >= 12:
- if not _check_digits_re.match(n[-3:]):
+ if not re.match(u'^[1-9A-V][1-9A-Z][0-9A]$', n[-3:]):
raise InvalidComponent()
if n[-1] != calc_check_digit(n[:-1]):
raise InvalidChecksum()
diff --git a/tests/test_mx_rfc.doctest b/tests/test_mx_rfc.doctest
index eba887d..7fa5c34 100644
--- a/tests/test_mx_rfc.doctest
+++ b/tests/test_mx_rfc.doctest
@@ -41,6 +41,14 @@ Traceback (most recent call last):
InvalidFormat: ...
+The first four digits should only be letters for 10 or 13-digit numbers.
+
+>>> rfc.validate('ABCD 12345678')
+Traceback (most recent call last):
+ ...
+InvalidFormat: ...
+
+
The first four digits of a personal number should not be one of the
blacklisted words.
https://arthurdejong.org/git/python-stdnum/commit/?id=08d105392029bf430de4a854cf250215ecebf6ba
commit 08d105392029bf430de4a854cf250215ecebf6ba
Author: Arthur de Jong <arthur@arthurdejong.org>
Date: Tue May 1 22:52:38 2018 +0200
Make unicode conversion standard
A few modules use non-ASCII characters in numbers. This introduces a
to_unicode() function in util so that it can be used by multiple
modules.
diff --git a/stdnum/es/referenciacatastral.py b/stdnum/es/referenciacatastral.py
index 129bb16..c4d913c 100644
--- a/stdnum/es/referenciacatastral.py
+++ b/stdnum/es/referenciacatastral.py
@@ -55,7 +55,7 @@ InvalidChecksum: ...
"""
from stdnum.exceptions import *
-from stdnum.util import clean
+from stdnum.util import clean, to_unicode
alphabet = u'ABCDEFGHIJKLMNÑOPQRSTUVWXYZ0123456789'
@@ -89,16 +89,9 @@ def _check_digit(number):
return 'MQWERTYUIOPASDFGHJKLBZX'[s % 23]
-def _force_unicode(number):
- """Convert the number to unicode."""
- if not hasattr(number, 'isnumeric'): # pragma: no cover (Python 2 code)
- number = number.decode('utf-8')
- return number
-
-
def calc_check_digits(number):
"""Calculate the check digits for the number."""
- number = _force_unicode(compact(number))
+ number = to_unicode(compact(number))
return (
_check_digit(number[0:7] + number[14:18]) +
_check_digit(number[7:14] + number[14:18]))
@@ -108,7 +101,7 @@ def validate(number):
"""Check if the number is a valid Cadastral Reference. This checks the
length, formatting and check digits."""
number = compact(number)
- n = _force_unicode(number)
+ n = to_unicode(number)
if not all(c in alphabet for c in n):
raise InvalidFormat()
if len(n) != 20:
diff --git a/stdnum/mx/rfc.py b/stdnum/mx/rfc.py
index 87acbe8..b186950 100644
--- a/stdnum/mx/rfc.py
+++ b/stdnum/mx/rfc.py
@@ -64,15 +64,15 @@ import datetime
import re
from stdnum.exceptions import *
-from stdnum.util import clean
+from stdnum.util import clean, to_unicode
# regular expression for matching numbers
-_rfc_re = re.compile(r'^[A-Z&Ñ]{3,4}[0-9]{6}[0-9A-Z]{0,5}$')
+_rfc_re = re.compile(u'^[A-Z&Ñ]{3,4}[0-9]{6}[0-9A-Z]{0,5}$')
# regular expression for matching the last 3 check digits
-_check_digits_re = re.compile(r'^[1-9A-V][1-9A-Z][0-9A]$')
+_check_digits_re = re.compile(u'^[1-9A-V][1-9A-Z][0-9A]$')
# these values should not appear as first part of a personal number
@@ -86,7 +86,7 @@ _name_blacklist = set([
# characters used for checksum calculation,
-_alphabet = '0123456789ABCDEFGHIJKLMN&OPQRSTUVWXYZ Ñ'
+_alphabet = u'0123456789ABCDEFGHIJKLMN&OPQRSTUVWXYZ Ñ'
def compact(number):
@@ -110,6 +110,7 @@ def _get_date(number):
def calc_check_digit(number):
"""Calculate the check digit. The number passed should not have the
check digit included."""
+ number = to_unicode(number)
number = (' ' + number)[-12:]
check = sum(_alphabet.index(n) * (13 - i) for i, n in enumerate(number))
return _alphabet[(11 - check) % 11]
@@ -118,22 +119,23 @@ def calc_check_digit(number):
def validate(number, validate_check_digits=False):
"""Check if the number is a valid RFC."""
number = compact(number)
- if not _rfc_re.match(number):
+ n = to_unicode(number)
+ if not _rfc_re.match(n):
raise InvalidFormat()
- if len(number) in (10, 13):
+ if len(n) in (10, 13):
# number assigned to person
- if number[:4] in _name_blacklist:
+ if n[:4] in _name_blacklist:
raise InvalidComponent()
- _get_date(number[4:10])
- elif len(number) == 12:
+ _get_date(n[4:10])
+ elif len(n) == 12:
# number assigned to company
- _get_date(number[3:9])
+ _get_date(n[3:9])
else:
raise InvalidLength()
- if validate_check_digits and len(number) >= 12:
- if not _check_digits_re.match(number[-3:]):
+ if validate_check_digits and len(n) >= 12:
+ if not _check_digits_re.match(n[-3:]):
raise InvalidComponent()
- if number[-1] != calc_check_digit(number[:-1]):
+ if n[-1] != calc_check_digit(n[:-1]):
raise InvalidChecksum()
return number
diff --git a/stdnum/util.py b/stdnum/util.py
index 3e04c79..8fa082a 100644
--- a/stdnum/util.py
+++ b/stdnum/util.py
@@ -129,6 +129,16 @@ def clean(number, deletechars=''):
return ''.join(x for x in number if x not in deletechars)
+def to_unicode(text):
+ """Convert the specified text to a unicode string."""
+ if not isinstance(text, type(u'')):
+ try:
+ return text.decode('utf-8')
+ except UnicodeDecodeError:
+ return text.decode('iso-8859-15')
+ return text
+
+
def get_number_modules(base='stdnum'):
"""Yield all the number validation modules under the specified module."""
__import__(base)
diff --git a/tests/test_util.doctest b/tests/test_util.doctest
index c4dc072..959a544 100644
--- a/tests/test_util.doctest
+++ b/tests/test_util.doctest
@@ -24,7 +24,21 @@ meant for internal use by stdnum modules and is not
guaranteed to remain
stable and as such not part of the public API of stdnum.
>>> from stdnum.util import (
-... get_number_modules, get_module_name, get_module_description)
+... get_number_modules, get_module_name, get_module_description,
+... to_unicode)
+
+
+The to_unicode() function is used to force conversion of a string to unicode
+if it is not already a unicode string. This is mostly used to convert numbers
+with non-ASCII characters in it.
+
+>>> n_str = b'\xc3\x91'.decode('utf-8') # Ñ character as unicode string
+>>> to_unicode(n_str) == n_str
+True
+>>> to_unicode(n_str.encode('utf-8')) == n_str
+True
+>>> to_unicode(n_str.encode('iso-8859-1')) == n_str
+True
The get_module_name() function is used in the example WSGI application and
-----------------------------------------------------------------------
Summary of changes:
online_check/stdnum.wsgi | 24 ++++++++++++------------
stdnum/es/referenciacatastral.py | 13 +++----------
stdnum/mx/rfc.py | 36 ++++++++++++++++--------------------
stdnum/util.py | 10 ++++++++++
tests/test_mx_rfc.doctest | 8 ++++++++
tests/test_util.doctest | 16 +++++++++++++++-
6 files changed, 64 insertions(+), 43 deletions(-)
hooks/post-receive
--
python-stdnum
--
To unsubscribe send an email to
python-stdnum-commits-unsubscribe@lists.arthurdejong.org or see
https://lists.arthurdejong.org/python-stdnum-commits/
- python-stdnum branch master updated. 1.9-3-g04f78fb,
Commits of the python-stdnum project