python-stdnum branch master updated. 1.10-11-g50874a9
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
python-stdnum branch master updated. 1.10-11-g50874a9
- From: Commits of the python-stdnum project <python-stdnum-commits [at] lists.arthurdejong.org>
- To: python-stdnum-commits [at] lists.arthurdejong.org
- Reply-to: python-stdnum-users [at] lists.arthurdejong.org
- Subject: python-stdnum branch master updated. 1.10-11-g50874a9
- Date: Wed, 6 Feb 2019 23:30:47 +0100 (CET)
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "python-stdnum".
The branch, master has been updated
via 50874a93fae06c08be3c3ecd45c091f87ed8c80c (commit)
from 4cb44aa747c9674222fefb4f696e6c7db33260c9 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
https://arthurdejong.org/git/python-stdnum/commit/?id=50874a93fae06c08be3c3ecd45c091f87ed8c80c
commit 50874a93fae06c08be3c3ecd45c091f87ed8c80c
Author: Arthur de Jong <arthur@arthurdejong.org>
Date: Tue Feb 5 22:49:00 2019 +0100
Add Mexican CURP
diff --git a/stdnum/mx/__init__.py b/stdnum/mx/__init__.py
index 8cfc93d..51c4fe4 100644
--- a/stdnum/mx/__init__.py
+++ b/stdnum/mx/__init__.py
@@ -1,7 +1,7 @@
# __init__.py - collection of Mexican numbers
# coding: utf-8
#
-# Copyright (C) 2015 Arthur de Jong
+# Copyright (C) 2015-2019 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -20,5 +20,6 @@
"""Collection of Mexican numbers."""
-# provide vat as an alias
+# provide aliases
+from stdnum.mx import curp as personalid # noqa: F401
from stdnum.mx import rfc as vat # noqa: F401
diff --git a/stdnum/mx/curp.py b/stdnum/mx/curp.py
new file mode 100644
index 0000000..c1ddfd1
--- /dev/null
+++ b/stdnum/mx/curp.py
@@ -0,0 +1,135 @@
+# curp.py - functions for handling Mexican personal identifiers
+# coding: utf-8
+#
+# Copyright (C) 2019 Arthur de Jong
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""CURP (Clave Única de Registro de Población, Mexican personal ID).
+
+The Clave Única de Registro de Población (Population Registry Code) is unique
+identifier for both citizens and residents of Mexico. The is an 18-character
+alphanumeric that contains certain letters from the person's name, their
+gender and birth date and a check digit.
+
+More information:
+
+* http://en.wikipedia.org/wiki/CURP
+* https://www.gob.mx/curp/
+
+>>> validate('BOXW310820HNERXN09')
+'BOXW310820HNERXN09'
+>>> validate('BOXW310820HNERXN08')
+Traceback (most recent call last):
+ ...
+InvalidChecksum: ...
+>>> get_birth_date('BOXW310820HNERXN09')
+datetime.date(1931, 8, 20)
+>>> get_gender('BOXW310820HNERXN09')
+'M'
+"""
+
+import datetime
+import re
+
+from stdnum.exceptions import *
+from stdnum.util import clean
+
+
+# these values should not appear as first part
+_name_blacklist = set('''
+ BACA BAKA BUEI BUEY CACA CACO CAGA CAGO CAKA CAKO COGE COGI COJA COJE
+ COJI COJO COLA CULO FALO FETO GETA GUEI GUEY JETA JOTO KACA KACO KAGA
+ KAGO KAKA KAKO KOGE KOGI KOJA KOJE KOJI KOJO KOLA KULO LILO LOCA LOCO
+ LOKA LOKO MAME MAMO MEAR MEAS MEON MIAR MION MOCO MOKO MULA MULO NACA
+ NACO PEDA PEDO PENE PIPI PITO POPO PUTA PUTO QULO RATA ROBA ROBE ROBO
+ RUIN SENO TETA VACA VAGA VAGO VAKA VUEI VUEY WUEI WUEY
+'''.split())
+
+# these are valid two-character states
+_valid_states = set('''
+ AS BC BS CC CH CL CM CS DF DG GR GT HG JC MC MN MS NE NL NT OC PL QR QT
+ SL SP SR TC TL TS VZ YN ZS
+'''.split())
+
+
+def compact(number):
+ """Convert the number to the minimal representation. This strips
+ surrounding whitespace and separation dash."""
+ return clean(number, '-_ ').upper().strip()
+
+
+def get_birth_date(number):
+ """Split the date parts from the number and return the birth date."""
+ number = compact(number)
+ year = int(number[4:6])
+ month = int(number[6:8])
+ day = int(number[8:10])
+ if number[16].isdigit():
+ year += 1900
+ else:
+ year += 2000
+ try:
+ return datetime.date(year, month, day)
+ except ValueError:
+ raise InvalidComponent()
+
+
+def get_gender(number):
+ """Get the gender (M/F) from the person's CURP."""
+ number = compact(number)
+ if number[10] == 'H':
+ return 'M'
+ elif number[10] == 'M':
+ return 'F'
+ else:
+ raise InvalidComponent()
+
+
+# characters used for checksum calculation,
+_alphabet = '0123456789ABCDEFGHIJKLMN&OPQRSTUVWXYZ'
+
+
+def calc_check_digit(number):
+ """Calculate the check digit."""
+ check = sum(_alphabet.index(c) * (18 - i) for i, c in
enumerate(number[:17]))
+ return str((10 - check % 10) % 10)
+
+
+def validate(number, validate_check_digits=False):
+ """Check if the number is a valid CURP."""
+ number = compact(number)
+ if len(number) != 18:
+ raise InvalidLength()
+ if not re.match(u'^[A-Z]{4}[0-9]{6}[A-Z]{6}[0-9A-Z][0-9]$', number):
+ raise InvalidFormat()
+ if number[:4] in _name_blacklist:
+ raise InvalidComponent()
+ get_birth_date(number)
+ get_gender(number)
+ if number[11:13] not in _valid_states:
+ raise InvalidComponent()
+ if number[-1] != calc_check_digit(number):
+ raise InvalidChecksum()
+ return number
+
+
+def is_valid(number, validate_check_digits=False):
+ """Check if the number provided is a valid CURP."""
+ try:
+ return bool(validate(number, validate_check_digits))
+ except ValidationError:
+ return False
diff --git a/tests/test_mx_curp.doctest b/tests/test_mx_curp.doctest
new file mode 100644
index 0000000..73eeb81
--- /dev/null
+++ b/tests/test_mx_curp.doctest
@@ -0,0 +1,118 @@
+test_mx_curp.doctest - more detailed doctests for the stdnum.mx.curp module
+
+Copyright (C) 2019 Arthur de Jong
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA
+
+
+This file contains more detailed doctests for the stdnum.mx.curp module.
+
+>>> from stdnum.mx import curp
+>>> from stdnum.exceptions import *
+
+
+Below are a few tests that were found from various sources. They are all
+expected to be hypothetical and examples and not likely be real persons.
+
+
+Gloria Hernández García, female, born on 27 April 1956 in the state of
+Veracruz.
+
+>>> curp.validate('HEGG560427MVZRRL04')
+'HEGG560427MVZRRL04'
+>>> curp.get_gender('HEGG560427MVZRRL04')
+'F'
+>>> curp.get_birth_date('HEGG560427MVZRRL04')
+datetime.date(1956, 4, 27)
+
+
+Luis Raúl Bello Mena, male, born on March 13 1992 in the state of Mexico.
+
+>>> curp.validate('BEML920313HMCLNS09')
+'BEML920313HMCLNS09'
+>>> curp.get_gender('BEML920313HMCLNS09')
+'M'
+>>> curp.get_birth_date('BEML920313HMCLNS09')
+datetime.date(1992, 3, 13)
+
+
+Luis Perez Gomez, female, born on September 9 1989 in the state of Jalisco.
+
+>>> curp.validate('PEGL890909MJCRMS08')
+'PEGL890909MJCRMS08'
+>>> curp.get_gender('PEGL890909MJCRMS08')
+'F'
+>>> curp.get_birth_date('PEGL890909MJCRMS08')
+datetime.date(1989, 9, 9)
+
+
+This tests several corner cases in the validation.
+
+>>> curp.validate('PEGL890909MJCRMS08')
+'PEGL890909MJCRMS08'
+>>> curp.validate('1230')
+Traceback (most recent call last):
+ ...
+InvalidLength: ...
+>>> curp.validate('123ZZZZZZZZZZZZZ90')
+Traceback (most recent call last):
+ ...
+InvalidFormat: ...
+>>> curp.validate('BACA890909MJCRMS05') # bad word used
+Traceback (most recent call last):
+ ...
+InvalidComponent: ...
+>>> curp.validate('PEGL891313MJCRMS06') # invalid date
+Traceback (most recent call last):
+ ...
+InvalidComponent: ...
+>>> curp.validate('PEGL890909QJCRMS08') # invalid gender
+Traceback (most recent call last):
+ ...
+InvalidComponent: ...
+>>> curp.validate('PEGL890909MQQRMS02') # invalid state
+Traceback (most recent call last):
+ ...
+InvalidComponent: ...
+>>> curp.validate('PEGL890909MJCRMS09') # invalid check digit
+Traceback (most recent call last):
+ ...
+InvalidChecksum: ...
+
+
+These have been found online and should all be valid numbers. Note that these
+numbers all have valid check digits (also see the list below).
+
+>>> numbers = '''
+...
+... AAAA000101HDFCCC09
+... AAMG890608HDFLJL00
+... BAAA890317HDFRLL03
+... BAAD890419HMNRRV07
+... BEML920313HMCLNS09
+... HEGG560427MVZRRL04
+... HEGR891009HMNRRD09
+... MARR890512HMNRMN09
+... MESJ890928HMNZNS00
+... OOMG890727HMNRSR06
+... PEGL890909MJCRMS08
+... TOMA880125HMNRRN02
+... TOMA880125HMNRRNO2
+... VIAA900930MMNCLL08
+...
+... '''
+>>> [x for x in numbers.splitlines() if x and not curp.is_valid(x,
validate_check_digits=True)]
+[]
-----------------------------------------------------------------------
Summary of changes:
stdnum/mx/__init__.py | 5 +-
stdnum/mx/curp.py | 135 +++++++++++++++++++++++++++++++++++++++++++++
tests/test_mx_curp.doctest | 118 +++++++++++++++++++++++++++++++++++++++
3 files changed, 256 insertions(+), 2 deletions(-)
create mode 100644 stdnum/mx/curp.py
create mode 100644 tests/test_mx_curp.doctest
hooks/post-receive
--
python-stdnum
--
To unsubscribe send an email to
python-stdnum-commits-unsubscribe@lists.arthurdejong.org or see
https://lists.arthurdejong.org/python-stdnum-commits/
- python-stdnum branch master updated. 1.10-11-g50874a9,
Commits of the python-stdnum project