lists.arthurdejong.org
RSS feed

python-stdnum branch master updated. 1.10-11-g50874a9

[Date Prev][Date Next] [Thread Prev][Thread Next]

python-stdnum branch master updated. 1.10-11-g50874a9



This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "python-stdnum".

The branch, master has been updated
       via  50874a93fae06c08be3c3ecd45c091f87ed8c80c (commit)
      from  4cb44aa747c9674222fefb4f696e6c7db33260c9 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
https://arthurdejong.org/git/python-stdnum/commit/?id=50874a93fae06c08be3c3ecd45c091f87ed8c80c

commit 50874a93fae06c08be3c3ecd45c091f87ed8c80c
Author: Arthur de Jong <arthur@arthurdejong.org>
Date:   Tue Feb 5 22:49:00 2019 +0100

    Add Mexican CURP

diff --git a/stdnum/mx/__init__.py b/stdnum/mx/__init__.py
index 8cfc93d..51c4fe4 100644
--- a/stdnum/mx/__init__.py
+++ b/stdnum/mx/__init__.py
@@ -1,7 +1,7 @@
 # __init__.py - collection of Mexican numbers
 # coding: utf-8
 #
-# Copyright (C) 2015 Arthur de Jong
+# Copyright (C) 2015-2019 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -20,5 +20,6 @@
 
 """Collection of Mexican numbers."""
 
-# provide vat as an alias
+# provide aliases
+from stdnum.mx import curp as personalid  # noqa: F401
 from stdnum.mx import rfc as vat  # noqa: F401
diff --git a/stdnum/mx/curp.py b/stdnum/mx/curp.py
new file mode 100644
index 0000000..c1ddfd1
--- /dev/null
+++ b/stdnum/mx/curp.py
@@ -0,0 +1,135 @@
+# curp.py - functions for handling Mexican personal identifiers
+# coding: utf-8
+#
+# Copyright (C) 2019 Arthur de Jong
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""CURP (Clave Única de Registro de Población, Mexican personal ID).
+
+The Clave Única de Registro de Población (Population Registry Code) is unique
+identifier for both citizens and residents of Mexico. The is an 18-character
+alphanumeric that contains certain letters from the person's name, their
+gender and birth date and a check digit.
+
+More information:
+
+* http://en.wikipedia.org/wiki/CURP
+* https://www.gob.mx/curp/
+
+>>> validate('BOXW310820HNERXN09')
+'BOXW310820HNERXN09'
+>>> validate('BOXW310820HNERXN08')
+Traceback (most recent call last):
+    ...
+InvalidChecksum: ...
+>>> get_birth_date('BOXW310820HNERXN09')
+datetime.date(1931, 8, 20)
+>>> get_gender('BOXW310820HNERXN09')
+'M'
+"""
+
+import datetime
+import re
+
+from stdnum.exceptions import *
+from stdnum.util import clean
+
+
+# these values should not appear as first part
+_name_blacklist = set('''
+    BACA BAKA BUEI BUEY CACA CACO CAGA CAGO CAKA CAKO COGE COGI COJA COJE
+    COJI COJO COLA CULO FALO FETO GETA GUEI GUEY JETA JOTO KACA KACO KAGA
+    KAGO KAKA KAKO KOGE KOGI KOJA KOJE KOJI KOJO KOLA KULO LILO LOCA LOCO
+    LOKA LOKO MAME MAMO MEAR MEAS MEON MIAR MION MOCO MOKO MULA MULO NACA
+    NACO PEDA PEDO PENE PIPI PITO POPO PUTA PUTO QULO RATA ROBA ROBE ROBO
+    RUIN SENO TETA VACA VAGA VAGO VAKA VUEI VUEY WUEI WUEY
+'''.split())
+
+# these are valid two-character states
+_valid_states = set('''
+    AS BC BS CC CH CL CM CS DF DG GR GT HG JC MC MN MS NE NL NT OC PL QR QT
+    SL SP SR TC TL TS VZ YN ZS
+'''.split())
+
+
+def compact(number):
+    """Convert the number to the minimal representation. This strips
+    surrounding whitespace and separation dash."""
+    return clean(number, '-_ ').upper().strip()
+
+
+def get_birth_date(number):
+    """Split the date parts from the number and return the birth date."""
+    number = compact(number)
+    year = int(number[4:6])
+    month = int(number[6:8])
+    day = int(number[8:10])
+    if number[16].isdigit():
+        year += 1900
+    else:
+        year += 2000
+    try:
+        return datetime.date(year, month, day)
+    except ValueError:
+        raise InvalidComponent()
+
+
+def get_gender(number):
+    """Get the gender (M/F) from the person's CURP."""
+    number = compact(number)
+    if number[10] == 'H':
+        return 'M'
+    elif number[10] == 'M':
+        return 'F'
+    else:
+        raise InvalidComponent()
+
+
+# characters used for checksum calculation,
+_alphabet = '0123456789ABCDEFGHIJKLMN&OPQRSTUVWXYZ'
+
+
+def calc_check_digit(number):
+    """Calculate the check digit."""
+    check = sum(_alphabet.index(c) * (18 - i) for i, c in 
enumerate(number[:17]))
+    return str((10 - check % 10) % 10)
+
+
+def validate(number, validate_check_digits=False):
+    """Check if the number is a valid CURP."""
+    number = compact(number)
+    if len(number) != 18:
+        raise InvalidLength()
+    if not re.match(u'^[A-Z]{4}[0-9]{6}[A-Z]{6}[0-9A-Z][0-9]$', number):
+        raise InvalidFormat()
+    if number[:4] in _name_blacklist:
+        raise InvalidComponent()
+    get_birth_date(number)
+    get_gender(number)
+    if number[11:13] not in _valid_states:
+        raise InvalidComponent()
+    if number[-1] != calc_check_digit(number):
+        raise InvalidChecksum()
+    return number
+
+
+def is_valid(number, validate_check_digits=False):
+    """Check if the number provided is a valid CURP."""
+    try:
+        return bool(validate(number, validate_check_digits))
+    except ValidationError:
+        return False
diff --git a/tests/test_mx_curp.doctest b/tests/test_mx_curp.doctest
new file mode 100644
index 0000000..73eeb81
--- /dev/null
+++ b/tests/test_mx_curp.doctest
@@ -0,0 +1,118 @@
+test_mx_curp.doctest - more detailed doctests for the stdnum.mx.curp module
+
+Copyright (C) 2019 Arthur de Jong
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA
+
+
+This file contains more detailed doctests for the stdnum.mx.curp module.
+
+>>> from stdnum.mx import curp
+>>> from stdnum.exceptions import *
+
+
+Below are a few tests that were found from various sources. They are all
+expected to be hypothetical and examples and not likely be real persons.
+
+
+Gloria Hernández García, female, born on 27 April 1956 in the state of
+Veracruz.
+
+>>> curp.validate('HEGG560427MVZRRL04')
+'HEGG560427MVZRRL04'
+>>> curp.get_gender('HEGG560427MVZRRL04')
+'F'
+>>> curp.get_birth_date('HEGG560427MVZRRL04')
+datetime.date(1956, 4, 27)
+
+
+Luis Raúl Bello Mena, male, born on March 13 1992 in the state of Mexico.
+
+>>> curp.validate('BEML920313HMCLNS09')
+'BEML920313HMCLNS09'
+>>> curp.get_gender('BEML920313HMCLNS09')
+'M'
+>>> curp.get_birth_date('BEML920313HMCLNS09')
+datetime.date(1992, 3, 13)
+
+
+Luis Perez Gomez, female, born on September 9 1989 in the state of Jalisco.
+
+>>> curp.validate('PEGL890909MJCRMS08')
+'PEGL890909MJCRMS08'
+>>> curp.get_gender('PEGL890909MJCRMS08')
+'F'
+>>> curp.get_birth_date('PEGL890909MJCRMS08')
+datetime.date(1989, 9, 9)
+
+
+This tests several corner cases in the validation.
+
+>>> curp.validate('PEGL890909MJCRMS08')
+'PEGL890909MJCRMS08'
+>>> curp.validate('1230')
+Traceback (most recent call last):
+    ...
+InvalidLength: ...
+>>> curp.validate('123ZZZZZZZZZZZZZ90')
+Traceback (most recent call last):
+    ...
+InvalidFormat: ...
+>>> curp.validate('BACA890909MJCRMS05')  # bad word used
+Traceback (most recent call last):
+    ...
+InvalidComponent: ...
+>>> curp.validate('PEGL891313MJCRMS06')  # invalid date
+Traceback (most recent call last):
+    ...
+InvalidComponent: ...
+>>> curp.validate('PEGL890909QJCRMS08')  # invalid gender
+Traceback (most recent call last):
+    ...
+InvalidComponent: ...
+>>> curp.validate('PEGL890909MQQRMS02')  # invalid state
+Traceback (most recent call last):
+    ...
+InvalidComponent: ...
+>>> curp.validate('PEGL890909MJCRMS09')  # invalid check digit
+Traceback (most recent call last):
+    ...
+InvalidChecksum: ...
+
+
+These have been found online and should all be valid numbers. Note that these
+numbers all have valid check digits (also see the list below).
+
+>>> numbers = '''
+...
+... AAAA000101HDFCCC09
+... AAMG890608HDFLJL00
+... BAAA890317HDFRLL03
+... BAAD890419HMNRRV07
+... BEML920313HMCLNS09
+... HEGG560427MVZRRL04
+... HEGR891009HMNRRD09
+... MARR890512HMNRMN09
+... MESJ890928HMNZNS00
+... OOMG890727HMNRSR06
+... PEGL890909MJCRMS08
+... TOMA880125HMNRRN02
+... TOMA880125HMNRRNO2
+... VIAA900930MMNCLL08
+...
+... '''
+>>> [x for x in numbers.splitlines() if x and not curp.is_valid(x, 
validate_check_digits=True)]
+[]

-----------------------------------------------------------------------

Summary of changes:
 stdnum/mx/__init__.py      |   5 +-
 stdnum/mx/curp.py          | 135 +++++++++++++++++++++++++++++++++++++++++++++
 tests/test_mx_curp.doctest | 118 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 256 insertions(+), 2 deletions(-)
 create mode 100644 stdnum/mx/curp.py
 create mode 100644 tests/test_mx_curp.doctest


hooks/post-receive
-- 
python-stdnum
-- 
To unsubscribe send an email to
python-stdnum-commits-unsubscribe@lists.arthurdejong.org or see
https://lists.arthurdejong.org/python-stdnum-commits/