Index: python-stdnum/stdnum/fi/__init__.py =================================================================== new file mode 100644 Index: python-stdnum/stdnum/fi/hetu.py =================================================================== new file mode 100644 --- python-stdnum/stdnum/fi/hetu.py (revision 0) +++ python-stdnum/stdnum/fi/hetu.py (working copy) @@ -0,0 +1,131 @@ +# hetu.py - functions for handling Finnish personal identity codes +# coding: utf-8 +# +# Copyright (C) 2011 Jussi Judin +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301 USA + +"""Module for handling Finnish personal identity codes (HETU, Henkilötunnus). + +See http://www.vaestorekisterikeskus.fi/default.aspx?id=45 for checksum +calculation details and http://tarkistusmerkit.teppovuori.fi/tarkmerk.htm#hetu1 +for historical details. + +>>> is_valid('131052-308T') +True +>>> is_valid('131052+308T') +True +>>> is_valid('131052A308T') +True +>>> is_valid('131052a308t') +True + +Invalid checksum: +>>> is_valid('131052-308U') +False + +Invalid century sign: +>>> is_valid('131052/308T') +False + +Invalid century sign: +>>> is_valid('131052T308T') +False + +Invalid birth date: +>>> is_valid('310252-308Y') +False +>>> is_valid('130052-308R') +False + +Leaving first zero out: +>>> is_valid('10101-0101') +False + +Invalid individual number: +>>> is_valid('131052-000V') +False + +>>> compact('131052a308t') +'131052A308T' +>>> format('131052a308t') +'131052A308T' +""" + +import re +import datetime + +def _normalize(number): + result = number.strip() + result = result.upper() + return result + +# These are here just for completeness as there are no different length forms +# of Finnish personal identity codes: +compact = _normalize +format = _normalize + +_centuries = { + '+': 1800, + '-': 1900, + 'A': 2000, + } + +# Finnish personal identity codes are composed of date part, century indicating +# sign, individual number and control character. +# ddmmyyciiiC +_hetu_re = re.compile("^(?P[0123]\d)(?P[01]\d)(?P\d\d)(?P[\-+A])(?P\d\d\d)(?P[0-9ABCDEFHJKLMNPRSTUVWXY])$") + +_control_characters = "0123456789ABCDEFHJKLMNPRSTUVWXY" + +def _calc_checksum(number): + return _control_characters[number % 31] + +def is_valid(number): + normalized = _normalize(number) + + match = _hetu_re.search(normalized) + if not match: + return False + + century_code = match.group("century") + if century_code not in _centuries: + return False + century = _centuries[century_code] + + day = int(match.group("day")) + month = int(match.group("month")) + year = int(match.group("year")) + date_year = century + year + + # Check if birth date is valid. + try: + datetime.date(date_year, month, day) + except ValueError, e: + return False + + individual_id = int(match.group("individual")) + # For historical reasons individual IDs start from 002. + if individual_id < 2: + return False + + checkable_number = int("%02d%02d%02d%03d" % (day, month, year, individual_id)) + claimed_control_character = match.group("control") + calculated_control_character = _calc_checksum(checkable_number) + if claimed_control_character != calculated_control_character: + return False + + return True