lists.arthurdejong.org
RSS feed

python-stdnum branch master updated. 1.16-23-g26a7e7b

[Date Prev][Date Next] [Thread Prev][Thread Next]

python-stdnum branch master updated. 1.16-23-g26a7e7b



This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "python-stdnum".

The branch, master has been updated
       via  26a7e7bc5cdb6b2d69cbabfbf7d3011697ec3eeb (commit)
       via  ca560cdc1261530c12a5f13d7a5180f46ebd2856 (commit)
       via  fc5638846244d280ec3adaedd250f3c4857f98f4 (commit)
      from  1a0e61332c5bd8c517f868f68050bbc416051abf (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
https://arthurdejong.org/git/python-stdnum/commit/?id=26a7e7bc5cdb6b2d69cbabfbf7d3011697ec3eeb

commit 26a7e7bc5cdb6b2d69cbabfbf7d3011697ec3eeb
Author: Gaurav Chauhan <71322586+vairag22@users.noreply.github.com>
Date:   Thu Sep 30 02:34:40 2021 +0530

    Add Indian GSTIN (VAT number)
    
    Closes https://github.com/arthurdejong/python-stdnum/pull/279

diff --git a/stdnum/in_/__init__.py b/stdnum/in_/__init__.py
index 5226bd0..ec870c8 100644
--- a/stdnum/in_/__init__.py
+++ b/stdnum/in_/__init__.py
@@ -19,3 +19,6 @@
 # 02110-1301 USA
 
 """Collection of Indian numbers."""
+
+# provide aliases
+from stdnum.in_ import gstin as vat  # noqa: F401
diff --git a/stdnum/in_/gstin.py b/stdnum/in_/gstin.py
new file mode 100644
index 0000000..84d8c07
--- /dev/null
+++ b/stdnum/in_/gstin.py
@@ -0,0 +1,161 @@
+# gstin.py - functions for handling Indian VAT numbers
+#
+# Copyright (C) 2021 Gaurav Chauhan
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""GSTIN (Goods and Services Tax identification number, Indian VAT number).
+
+The Goods and Services Tax identification number (GSTIN) is a 15 digit unique
+identifier assigned to all business entities in India registered under the
+Goods and Services Tax (GST) Act, 2017.
+
+Each GSTIN begins with a 2 digit state code, the next 10 characters are the
+holder's PAN, the 13th character is an alphanumeric digit that represents the
+number of GSTIN registrations made in a state or union territory for same the
+PAN, the 14th character is 'Z' and the last character is an alphanumeric
+check digit calculated using Luhn mod 36 algorithm.
+
+More information:
+
+* 
https://bajajfinserv.in/insights/what-is-goods-and-service-tax-identification-number
+* https://ddvat.gov.in/docs/List%20of%20State%20Code.pdf
+* https://en.wikipedia.org/wiki/Goods_and_Services_Tax_(India)
+
+>>> validate('27AAPFU0939F1ZV')
+'27AAPFU0939F1ZV'
+>>> validate('27AAPFU0939F1Z')
+Traceback (most recent call last):
+    ...
+InvalidLength: ...
+>>> validate('369296450896540')
+Traceback (most recent call last):
+    ...
+InvalidFormat: ...
+>>> validate('27AAPFU0939F1AA')
+Traceback (most recent call last):
+    ...
+InvalidComponent: ...
+>>> validate('27AAPFU0939F1ZO')
+Traceback (most recent call last):
+    ...
+InvalidChecksum: ...
+>>> to_pan('27AAPFU0939F1ZV')
+'AAPFU0939F'
+>>> info('27AAPFU0939F1ZV')['state']
+'Maharashtra'
+"""
+
+import re
+
+from stdnum import luhn
+from stdnum.exceptions import *
+from stdnum.in_ import pan
+from stdnum.util import clean
+
+
+_GSTIN_RE = re.compile(r'^[0-9]{2}[A-Z]{5}[0-9]{4}[A-Z][0-9A-Z]{3}$')
+
+_ALPHABET = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+_STATE_CODES = {
+    '01': 'Jammu and Kashmir',
+    '02': 'Himachal Pradesh',
+    '03': 'Punjab',
+    '04': 'Chandigarh',
+    '05': 'Uttarakhand',
+    '06': 'Haryana',
+    '07': 'Delhi',
+    '08': 'Rajasthan',
+    '09': 'Uttar Pradesh',
+    '10': 'Bihar',
+    '11': 'Sikkim',
+    '12': 'Arunachal Pradesh',
+    '13': 'Nagaland',
+    '14': 'Manipur',
+    '15': 'Mizoram',
+    '16': 'Tripura',
+    '17': 'Meghalaya',
+    '18': 'Assam',
+    '19': 'West Bengal',
+    '20': 'Jharkhand',
+    '21': 'Orissa',
+    '22': 'Chattisgarh',
+    '23': 'Madhya Pradesh',
+    '24': 'Gujarat',
+    '25': 'Daman and Diu',
+    '26': 'Dadar and Nagar Haveli',
+    '27': 'Maharashtra',
+    '28': 'Andhra Pradesh',
+    '29': 'Karnataka',
+    '30': 'Goa',
+    '31': 'Lakshadweep',
+    '32': 'Kerala',
+    '33': 'Tamil Nadu',
+    '34': 'Puducherry',
+    '35': 'Anadaman and Nicobar Islands',
+    '36': 'Telangana',
+    '37': 'Andhra Pradesh (New)',
+}
+
+
+def compact(number):
+    """Convert the number to the minimal representation. This strips the
+    number of any valid separators and removes surrounding whitespace."""
+    return clean(number, ' -').upper().strip()
+
+
+def validate(number):
+    """Check if the number provided is a valid GSTIN. This checks the length,
+    formatting and check digit."""
+    number = compact(number)
+    if len(number) != 15:
+        raise InvalidLength()
+    if not _GSTIN_RE.match(number):
+        raise InvalidFormat()
+    if number[:2] not in _STATE_CODES or number[12] == '0' or number[13] != 
'Z':
+        raise InvalidComponent()
+    pan.validate(number[2:12])
+    luhn.validate(number, _ALPHABET)
+    return number
+
+
+def is_valid(number):
+    """Check if the number provided is a valid GSTIN. This checks the length,
+    formatting and check digit."""
+    try:
+        return bool(validate(number))
+    except ValidationError:
+        return False
+
+
+def to_pan(number):
+    """Convert the number to a PAN."""
+    number = compact(number)
+    return number[2:12]
+
+
+def info(number):
+    """Provide information that can be decoded locally from GSTIN (without
+    API)."""
+    number = validate(number)
+    return {
+        'state': _STATE_CODES.get(number[:2]),
+        'pan': number[2:12],
+        'holder_type': pan.info(number[2:12])['holder_type'],
+        'initial': number[6],
+        'registration_count': _ALPHABET.index(number[12]),
+    }

https://arthurdejong.org/git/python-stdnum/commit/?id=ca560cdc1261530c12a5f13d7a5180f46ebd2856

commit ca560cdc1261530c12a5f13d7a5180f46ebd2856
Author: Gaurav Chauhan <71322586+vairag22@users.noreply.github.com>
Date:   Thu Sep 30 02:33:44 2021 +0530

    Add Indian EPIC number (Voter ID number)
    
    Closes https://github.com/arthurdejong/python-stdnum/pull/279

diff --git a/stdnum/in_/epic.py b/stdnum/in_/epic.py
new file mode 100644
index 0000000..14ec89d
--- /dev/null
+++ b/stdnum/in_/epic.py
@@ -0,0 +1,89 @@
+# epic.py - functions for handling Indian voter identification numbers
+#
+# Copyright (C) 2021 Gaurav Chauhan
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""EPIC (Electoral Photo Identity Card, Indian Voter ID).
+
+The Electoral Photo Identity Card (EPIC) is an identity document issued by
+the Election Commission of India (ECI) only to the India citizens who have
+reached the age of 18.
+
+Each EPIC contains an unique 10 digit alphanumeric identifier known as EPIC
+number or Voter ID number.
+
+Every EPIC number begins with a Functional Unique Serial Number (FUSN), a 3
+letter unique identifier for each Assembly Constituency. FUSN is followed by
+a 6 digit serial number and 1 check digit of the serial number calculated
+using Luhn algorithm.
+
+More information:
+
+* https://en.wikipedia.org/wiki/Voter_ID_(India)
+* https://www.kotaksecurities.com/ksweb/voter-id/serial-number-in-elctoral-roll
+
+>>> validate('WKH1186253')
+'WKH1186253'
+>>> validate('WKH118624')
+Traceback (most recent call last):
+    ...
+InvalidLength: ...
+>>> validate('1231186253')
+Traceback (most recent call last):
+    ...
+InvalidFormat: ...
+>>> validate('WKH1186263')
+Traceback (most recent call last):
+    ...
+InvalidChecksum: ...
+"""
+
+import re
+
+from stdnum import luhn
+from stdnum.exceptions import *
+from stdnum.util import clean
+
+
+_EPIC_RE = re.compile(r'^[A-Z]{3}[0-9]{7}$')
+
+
+def compact(number):
+    """Convert the number to the minimal representation. This strips the
+    number of any valid separators and removes surrounding whitespace."""
+    return clean(number, ' -').upper().strip()
+
+
+def validate(number):
+    """Check if the number provided is a valid EPIC number. This checks the
+    length, formatting and checksum."""
+    number = compact(number)
+    if len(number) != 10:
+        raise InvalidLength
+    if not _EPIC_RE.match(number):
+        raise InvalidFormat()
+    luhn.validate(number[3:])
+    return number
+
+
+def is_valid(number):
+    """Check if the number provided is a valid EPIC number. This checks the
+    length, formatting and checksum."""
+    try:
+        return bool(validate(number))
+    except ValidationError:
+        return False

https://arthurdejong.org/git/python-stdnum/commit/?id=fc5638846244d280ec3adaedd250f3c4857f98f4

commit fc5638846244d280ec3adaedd250f3c4857f98f4
Author: Gaurav Chauhan <71322586+vairag22@users.noreply.github.com>
Date:   Thu Sep 30 02:31:18 2021 +0530

    Improve validation and docstrings of Indian numbers
    
    This ensures that an Aadhaar cannot be a palindrome and checks the
    serial part of the PAN to not be all zeros. It also updates some
    descriptions of PAN holder types and renames the card_holder_type to
    just holder_type.
    
    Closes https://github.com/arthurdejong/python-stdnum/pull/279

diff --git a/stdnum/in_/aadhaar.py b/stdnum/in_/aadhaar.py
index eb57b95..cf5d97f 100644
--- a/stdnum/in_/aadhaar.py
+++ b/stdnum/in_/aadhaar.py
@@ -1,6 +1,7 @@
-# aadhaar.py - functions for handling Indian Aadhaar numbers
+# aadhaar.py - functions for handling Indian personal identity numbers
 #
 # Copyright (C) 2017 Srikanth L
+# Copyright (C) 2021 Gaurav Chauhan
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -17,15 +18,21 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 # 02110-1301 USA
 
-"""Aadhaar (Indian digital resident personal identity number)
+"""Aadhaar (Indian personal identity number).
 
-Aadhaar is a 12 digit unique identity number issued to all Indian residents.
-The number is assigned by the Unique Identification Authority of India
-(UIDAI).
+Aadhaar is a 12 digit identification number that can be obtained by Indian
+citizens, non-residents passport holders of India and resident foreign
+nationals. The number is issued by the Unique Identification Authority of
+India (UIDAI).
+
+Aadhaar is made up of 12 digits where the last digits is a check digit
+calculated using the Verhoeff algorithm. The numbers are generated in a
+random, non-repeating sequence and do not begin with 0 or 1.
 
 More information:
 
 * https://en.wikipedia.org/wiki/Aadhaar
+* 
https://web.archive.org/web/20140611025606/http://uidai.gov.in/UID_PDF/Working_Papers/A_UID_Numbering_Scheme.pdf
 
 >>> validate('234123412346')
 '234123412346'
@@ -41,6 +48,10 @@ InvalidFormat: ...
 Traceback (most recent call last):
     ...
 InvalidLength: ...
+>>> validate('222222222222')  # number cannot be a palindrome
+Traceback (most recent call last):
+    ...
+InvalidFormat: ...
 >>> format('234123412346')
 '2341 2341 2346'
 >>> mask('234123412346')
@@ -72,6 +83,8 @@ def validate(number):
         raise InvalidLength()
     if not aadhaar_re.match(number):
         raise InvalidFormat()
+    if number == number[::-1]:
+        raise InvalidFormat()  # Aadhaar cannot be a palindrome
     verhoeff.validate(number)
     return number
 
@@ -92,7 +105,7 @@ def format(number):
 
 
 def mask(number):
-    """Masks the first 8 digits as per MeitY guidelines for securing identity
-    information and Sensitive personal data."""
+    """Masks the first 8 digits as per Ministry of Electronics and
+    Information Technology (MeitY) guidelines."""
     number = compact(number)
     return 'XXXX XXXX ' + number[-4:]
diff --git a/stdnum/in_/pan.py b/stdnum/in_/pan.py
index 483e337..f3aedbe 100644
--- a/stdnum/in_/pan.py
+++ b/stdnum/in_/pan.py
@@ -1,6 +1,7 @@
-# pan.py - functions for handling Indian Permanent Account number (PAN)
+# pan.py - functions for handling Indian income tax numbers
 #
 # Copyright (C) 2017 Srikanth Lakshmanan
+# Copyright (C) 2021 Gaurav Chauhan
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -20,19 +21,23 @@
 """PAN (Permanent Account Number, Indian income tax identifier).
 
 The Permanent Account Number (PAN) is a 10 digit alphanumeric identifier for
-Indian individuals, families and corporates for income tax purposes.
+Indian individuals, families and corporates for income tax purposes. It is
+also issued to foreign nationals subject to a valid visa.
 
-The number is built up of 5 characters, 4 numbers and 1 character. The fourth
-character indicates the type of holder of the number and the last character
-is computed by an undocumented checksum algorithm.
+PAN is made up of 5 letters, 4 digits and 1 alphabetic check digit. The 4th
+character indicates the type of holder, the 5th character is either 1st
+letter of the holder's name or holder's surname in case of 'Individual' PAN,
+next 4 digits are serial numbers running from 0001 to 9999 and the last
+character is a check digit computed by an undocumented checksum algorithm.
 
 More information:
 
 * https://en.wikipedia.org/wiki/Permanent_account_number
+* 
https://incometaxindia.gov.in/tutorials/1.permanent%20account%20number%20(pan).pdf
 
 >>> validate('ACUPA7085R')
 'ACUPA7085R'
->>> validate('234123412347')
+>>> validate('ACUPA7085RR')
 Traceback (most recent call last):
     ...
 InvalidLength: ...
@@ -44,9 +49,13 @@ InvalidFormat: ...
 Traceback (most recent call last):
     ...
 InvalidComponent: ...
+>>> validate('ACUPA0000R')  # serial number should not be '0000'
+Traceback (most recent call last):
+    ...
+InvalidComponent: ...
 >>> mask('AAPPV8261K')
 'AAPPVXXXXK'
->>> info('AAPPV8261K')['card_holder_type']
+>>> info('AAPPV8261K')['holder_type']
 'Individual'
 """
 
@@ -58,6 +67,21 @@ from stdnum.util import clean
 
 _pan_re = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
 
+_pan_holder_types = {
+    'A': 'Association of Persons (AOP)',
+    'B': 'Body of Individuals (BOI)',
+    'C': 'Company',
+    'F': 'Firm/Limited Liability Partnership',
+    'G': 'Government Agency',
+    'H': 'Hindu Undivided Family (HUF)',
+    'L': 'Local Authority',
+    'J': 'Artificial Juridical Person',
+    'P': 'Individual',
+    'T': 'Trust',
+    'K': 'Krish (Trust Krish)',
+}
+# Type 'K' may have been discontinued, not listed on Income Text Dept website.
+
 
 def compact(number):
     """Convert the number to the minimal representation. This strips the
@@ -66,54 +90,43 @@ def compact(number):
 
 
 def validate(number):
-    """Check if the number provided is a valid PAN. This checks the
-    length and formatting."""
+    """Check if the number provided is a valid PAN. This checks the length
+    and formatting."""
     number = compact(number)
     if len(number) != 10:
         raise InvalidLength()
     if not _pan_re.match(number):
         raise InvalidFormat()
     info(number)  # used to check 4th digit
+    if number[5:9] == '0000':
+        raise InvalidComponent()
     return number
 
 
 def is_valid(number):
-    """Check if the number provided is a valid PAN. This checks the
-    length and formatting."""
+    """Check if the number provided is a valid PAN. This checks the length
+    and formatting."""
     try:
         return bool(validate(number))
     except ValidationError:
         return False
 
 
-_card_holder_types = {
-    'A': 'Association of Persons (AOP)',
-    'B': 'Body of Individuals (BOI)',
-    'C': 'Company',
-    'F': 'Firm',
-    'G': 'Government',
-    'H': 'HUF (Hindu Undivided Family)',
-    'L': 'Local Authority',
-    'J': 'Artificial Juridical Person',
-    'P': 'Individual',
-    'T': 'Trust (AOP)',
-    'K': 'Krish (Trust Krish)',
-}
-
-
 def info(number):
     """Provide information that can be decoded from the PAN."""
     number = compact(number)
-    card_holder_type = _card_holder_types.get(number[3])
-    if not card_holder_type:
+    holder_type = _pan_holder_types.get(number[3])
+    if not holder_type:
         raise InvalidComponent()
     return {
-        'card_holder_type': card_holder_type,
+        'holder_type': holder_type,
+        'card_holder_type': holder_type,  # for backwards compatibility
         'initial': number[4],
     }
 
 
 def mask(number):
-    """Mask the PAN as per CBDT masking standard."""
+    """Mask the PAN as per Central Board of Direct Taxes (CBDT) masking
+    standard."""
     number = compact(number)
     return number[:5] + 'XXXX' + number[-1:]

-----------------------------------------------------------------------

Summary of changes:
 stdnum/in_/__init__.py |   3 +
 stdnum/in_/aadhaar.py  |  27 ++++++---
 stdnum/in_/epic.py     |  89 +++++++++++++++++++++++++++
 stdnum/in_/gstin.py    | 161 +++++++++++++++++++++++++++++++++++++++++++++++++
 stdnum/in_/pan.py      |  73 +++++++++++++---------
 5 files changed, 316 insertions(+), 37 deletions(-)
 create mode 100644 stdnum/in_/epic.py
 create mode 100644 stdnum/in_/gstin.py


hooks/post-receive
-- 
python-stdnum