lists.arthurdejong.org
RSS feed

python-stdnum branch master updated. 1.20-42-g44575a1

[Date Prev][Date Next] [Thread Prev][Thread Next]

python-stdnum branch master updated. 1.20-42-g44575a1



This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "python-stdnum".

The branch, master has been updated
       via  44575a1826b5e3e8bec80b7b4999284ee5257d2b (commit)
       via  44c2355f7184863f5425f179f3a444dd95918669 (commit)
       via  01e87f987b77cc56451b74f4958e7fe3d8508cda (commit)
       via  37320ea1b38f3b3356a903af470d65b072cd7691 (commit)
       via  98b4fa0d9c3bdcf365c7825a6e754ec3c0a14992 (commit)
       via  6e8c783c9d0cef21f7349d3ce651ed1482605670 (commit)
      from  497bb1148ef2fd5d2371e99a33ede64d098640c4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
https://arthurdejong.org/git/python-stdnum/commit/?id=44575a1826b5e3e8bec80b7b4999284ee5257d2b

commit 44575a1826b5e3e8bec80b7b4999284ee5257d2b
Author: Arthur de Jong <arthur@arthurdejong.org>
Date:   Mon May 5 18:02:45 2025 +0200

    Allow reading IBAN registry from the command line
    
    It seems that the Swift website currently uses TLS fingerprinting to
    block downloads of the IBAN registry except in certain browsers.
    
    It also fixes an idiosyncrasy in the IBAN registry iteself where the
    Norwegian BBAN format string was not correct.

diff --git a/update/iban.py b/update/iban.py
index 3e954e6..d74984a 100755
--- a/update/iban.py
+++ b/update/iban.py
@@ -24,6 +24,8 @@ Financial Telecommunication which is the official IBAN 
registrar) to get
 the data needed to correctly parse and validate IBANs."""
 
 import csv
+import os.path
+import sys
 from collections import defaultdict
 
 import requests
@@ -31,6 +33,7 @@ import requests
 
 # The place where the current version of
 # swift_standards_infopaper_ibanregistry_1.txt can be downloaded.
+# Linked from 
https://www.swift.com/standards/data-standards/iban-international-bank-account-number
 download_url = 'https://www.swift.com/node/11971'
 
 
@@ -44,13 +47,20 @@ def get_country_codes(line):
 
 
 if __name__ == '__main__':
-    response = requests.get(download_url, timeout=30)
-    response.raise_for_status()
-    print('# generated from swift_standards_infopaper_ibanregistry_1.txt,')
-    print('# downloaded from %s' % download_url)
+    if len(sys.argv) > 1:
+        f = open(sys.argv[1], 'rt', encoding='iso-8859-15')
+        lines = iter(f)
+        print(f'# generated from {os.path.basename(sys.argv[1])}')
+        print(f'# downloaded from {download_url}')
+    else:
+        response = requests.get(download_url, timeout=30)
+        response.raise_for_status()
+        print('# generated from iban-registry_1.txt')
+        print(f'# downloaded from {download_url}')
+        lines = response.iter_lines(decode_unicode=True)
     values = defaultdict(dict)
     # the file is CSV but the data is in columns instead of rows
-    for row in csv.reader(response.iter_lines(decode_unicode=True), 
delimiter='\t', quotechar='"'):
+    for row in csv.reader(lines, delimiter='\t', quotechar='"'):
         # skip first row
         if row and row[0] != 'Data element':
             # first column contains label
@@ -66,6 +76,8 @@ if __name__ == '__main__':
         cname = data['Name of country']
         if bban.startswith(cc + '2!n'):
             bban = bban[5:]
+        if bban.startswith(cc):
+            bban = bban[2:]
         # print country line
         print('%s country="%s" bban="%s"' % (cc, cname, bban))
         # TODO: some countries have a fixed check digit value

https://arthurdejong.org/git/python-stdnum/commit/?id=44c2355f7184863f5425f179f3a444dd95918669

commit 44c2355f7184863f5425f179f3a444dd95918669
Author: Arthur de Jong <arthur@arthurdejong.org>
Date:   Mon May 5 16:55:51 2025 +0200

    Fix the downloading of GS1 application identifiers
    
    The URL changed and the embedded JSON also slightly changed.

diff --git a/update/gs1_ai.py b/update/gs1_ai.py
index 0d3c3d3..5257988 100755
--- a/update/gs1_ai.py
+++ b/update/gs1_ai.py
@@ -30,7 +30,7 @@ import requests
 
 
 # the location of the GS1 application identifiers
-download_url = 'https://www.gs1.org/standards/barcodes/application-identifiers'
+download_url = 'https://ref.gs1.org/ai/'
 
 
 # The user agent that will be passed in requests
@@ -55,8 +55,8 @@ def fetch_ais():
             yield (
                 ai,
                 formatstring,
-                entry['fnc1required'],
-                entry['label'].strip(),
+                entry['separatorRequired'],
+                entry['title'].strip(),
                 entry['description'].strip())
 
 

https://arthurdejong.org/git/python-stdnum/commit/?id=01e87f987b77cc56451b74f4958e7fe3d8508cda

commit 01e87f987b77cc56451b74f4958e7fe3d8508cda
Author: Arthur de Jong <arthur@arthurdejong.org>
Date:   Mon May 5 16:47:04 2025 +0200

    Fix datetime related deprecation warnings in update scripts

diff --git a/update/cn_loc.py b/update/cn_loc.py
index e8debf7..89b25ca 100755
--- a/update/cn_loc.py
+++ b/update/cn_loc.py
@@ -3,7 +3,7 @@
 # update/cn_loc.py - script to fetch data from the CN Open Data community
 #
 # Copyright (C) 2014-2015 Jiangge Zhang
-# Copyright (C) 2015-2022 Arthur de Jong
+# Copyright (C) 2015-2025 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -25,9 +25,9 @@ Github."""
 
 from __future__ import print_function, unicode_literals
 
+import datetime
 import sys
 from collections import OrderedDict
-from datetime import datetime
 
 import requests
 
@@ -87,7 +87,7 @@ if __name__ == '__main__':
     """Output a data file in the right format."""
     print("# generated from National Bureau of Statistics of the People's")
     print('# Republic of China, downloaded from %s' % data_url)
-    print('# %s' % datetime.utcnow())
+    print('# %s' % datetime.datetime.now(datetime.UTC))
     data_collection = fetch_data()
     for data in group_data(data_collection):
         print('%s county="%s" prefecture="%s" province="%s"' % data)
diff --git a/update/gs1_ai.py b/update/gs1_ai.py
index 29e2ab6..0d3c3d3 100755
--- a/update/gs1_ai.py
+++ b/update/gs1_ai.py
@@ -2,7 +2,7 @@
 
 # update/gs1_ai.py - script to get GS1 application identifiers
 #
-# Copyright (C) 2019-2024 Arthur de Jong
+# Copyright (C) 2019-2025 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -75,7 +75,7 @@ def group_ai_ranges():
 
 if __name__ == '__main__':
     print('# generated from %s' % download_url)
-    print('# on %s' % datetime.datetime.utcnow())
+    print('# on %s' % datetime.datetime.now(datetime.UTC))
     for ai1, ai2, format, require_fnc1, name, description in group_ai_ranges():
         _type = 'str'
         if re.match(r'^(N[68]\[?\+)?N[0-9]*[.]*[0-9]+\]?$', format) and 'date' 
in description.lower():

https://arthurdejong.org/git/python-stdnum/commit/?id=37320ea1b38f3b3356a903af470d65b072cd7691

commit 37320ea1b38f3b3356a903af470d65b072cd7691
Author: Arthur de Jong <arthur@arthurdejong.org>
Date:   Mon May 5 16:39:51 2025 +0200

    Fix the downloading of Belgian bank identifiers
    
    The site switched from XLS to XLSX files.

diff --git a/update/be_banks.py b/update/be_banks.py
index 7cb6f31..eef7bf7 100755
--- a/update/be_banks.py
+++ b/update/be_banks.py
@@ -3,7 +3,7 @@
 
 # update/be_banks.py - script to download Bank list from Belgian National Bank
 #
-# Copyright (C) 2018-2019 Arthur de Jong
+# Copyright (C) 2018-2025 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -23,15 +23,16 @@
 """This script downloads the list of banks with bank codes as used in the
 IBAN and BIC codes as published by the Belgian National Bank."""
 
+import io
 import os.path
 
+import openpyxl
 import requests
-import xlrd
 
 
 # The location of the XLS version of the bank identification codes. Also see
 # 
https://www.nbb.be/en/payment-systems/payment-standards/bank-identification-codes
-download_url = 'https://www.nbb.be/doc/be/be/protocol/current_codes.xls'
+download_url = 'https://www.nbb.be/doc/be/be/protocol/current_codes.xlsx'
 
 
 # List of values that refer to non-existing, reserved or otherwise not-
@@ -60,7 +61,7 @@ def clean(value):
 
 def get_values(sheet):
     """Return values (from, to, bic, bank_name) from the worksheet."""
-    rows = sheet.get_rows()
+    rows = sheet.iter_rows()
     # skip first two rows
     try:
         next(rows)
@@ -79,9 +80,9 @@ def get_values(sheet):
 if __name__ == '__main__':
     response = requests.get(download_url, timeout=30)
     response.raise_for_status()
-    workbook = xlrd.open_workbook(file_contents=response.content)
-    sheet = workbook.sheet_by_index(0)
-    version = sheet.cell(0, 0).value
+    workbook = openpyxl.load_workbook(io.BytesIO(response.content), 
read_only=True)
+    sheet = workbook.worksheets[0]
+    version = sheet.cell(1, 1).value
     print('# generated from %s downloaded from' %
           os.path.basename(download_url))
     print('# %s' % download_url)
diff --git a/update/requirements.txt b/update/requirements.txt
index b51c684..7f2fd1e 100644
--- a/update/requirements.txt
+++ b/update/requirements.txt
@@ -1,4 +1,3 @@
 lxml
 openpyxl
 requests
-xlrd

https://arthurdejong.org/git/python-stdnum/commit/?id=98b4fa0d9c3bdcf365c7825a6e754ec3c0a14992

commit 98b4fa0d9c3bdcf365c7825a6e754ec3c0a14992
Author: Arthur de Jong <arthur@arthurdejong.org>
Date:   Mon May 5 16:34:15 2025 +0200

    Always pass regex flags as keyword argument
    
    In particular with re.sub() it got confused with the count positional
    argument.
    
    Fixes a9039c1

diff --git a/stdnum/util.py b/stdnum/util.py
index abbd5d6..0848aa8 100644
--- a/stdnum/util.py
+++ b/stdnum/util.py
@@ -62,7 +62,7 @@ else:
 
 
 # Regular expression to match doctests in docstrings
-_strip_doctest_re = re.compile(r'^>>> .*\Z', re.DOTALL | re.MULTILINE)
+_strip_doctest_re = re.compile(r'^>>> .*\Z', flags=re.DOTALL | re.MULTILINE)
 
 
 # Regular expression to match digits
diff --git a/update/cfi.py b/update/cfi.py
index f5431a8..d1f3918 100755
--- a/update/cfi.py
+++ b/update/cfi.py
@@ -2,7 +2,7 @@
 
 # update/cfi.py - script to download CFI code list from the SIX group
 #
-# Copyright (C) 2022-2024 Arthur de Jong
+# Copyright (C) 2022-2025 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -35,7 +35,7 @@ download_url = 
'https://www.six-group.com/en/products-services/financial-informa
 
 def normalise(value):
     """Clean and minimise attribute names and values."""
-    return re.sub(r' *[(\[\n].*', '', value, re.MULTILINE).strip()
+    return re.sub(r' *[(\[\n].*', '', value, flags=re.MULTILINE).strip()
 
 
 def get_categories(sheet):
diff --git a/update/isil.py b/update/isil.py
index c403c8b..2a051bf 100755
--- a/update/isil.py
+++ b/update/isil.py
@@ -2,7 +2,7 @@
 
 # update/isil.py - script to download ISIL agencies
 #
-# Copyright (C) 2011-2019 Arthur de Jong
+# Copyright (C) 2011-2025 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -29,7 +29,7 @@ import lxml.html
 import requests
 
 
-spaces_re = re.compile(r'\s+', re.UNICODE)
+spaces_re = re.compile(r'\s+', flags=re.UNICODE)
 
 # the web page that holds information on the ISIL authorities
 download_url = 
'https://slks.dk/english/work-areas/libraries-and-literature/library-standards/isil'
diff --git a/update/my_bp.py b/update/my_bp.py
index d7ed69e..32d6edb 100755
--- a/update/my_bp.py
+++ b/update/my_bp.py
@@ -2,7 +2,7 @@
 
 # update/my_bp.py - script to download data from Malaysian government site
 #
-# Copyright (C) 2013-2022 Arthur de Jong
+# Copyright (C) 2013-2025 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -39,7 +39,7 @@ country_list_url = 'https://www.jpn.gov.my/en/kod-negara-eng'
 user_agent = 'Mozilla/5.0 (compatible; python-stdnum updater; 
+https://arthurdejong.org/python-stdnum/)'
 
 
-spaces_re = re.compile(r'\s+', re.UNICODE)
+spaces_re = re.compile(r'\s+', flags=re.UNICODE)
 
 
 def clean(td):

https://arthurdejong.org/git/python-stdnum/commit/?id=6e8c783c9d0cef21f7349d3ce651ed1482605670

commit 6e8c783c9d0cef21f7349d3ce651ed1482605670
Author: Arthur de Jong <arthur@arthurdejong.org>
Date:   Mon May 5 16:08:34 2025 +0200

    Switch some URLs to HTTPS

diff --git a/stdnum/cr/cpj.py b/stdnum/cr/cpj.py
index 30845c1..dcc8548 100644
--- a/stdnum/cr/cpj.py
+++ b/stdnum/cr/cpj.py
@@ -32,7 +32,7 @@ More information:
 
 * https://www.hacienda.go.cr/consultapagos/ayuda_cedulas.htm
 * https://www.procomer.com/downloads/quiero/guia_solicitud_vuce.pdf (page 11)
-* 
http://www.registronacional.go.cr/personas_juridicas/documentos/Consultas/Listado%20de%20Clases%20y%20Tipos%20Cedulas%20Juridicas.pdf
+* 
https://rnpdigital.com/personas_juridicas/documentos/Consultas/Listado%20de%20Clases%20y%20Tipos%20Cedulas%20Juridicas.pdf
 * https://www.hacienda.go.cr/ATV/frmConsultaSituTributaria.aspx
 
 >>> validate('3-101-999999')
diff --git a/stdnum/fr/nir.py b/stdnum/fr/nir.py
index 1eab080..121da27 100644
--- a/stdnum/fr/nir.py
+++ b/stdnum/fr/nir.py
@@ -35,7 +35,7 @@ More information:
 
 * https://www.insee.fr/en/metadonnees/definition/c1409
 * https://en.wikipedia.org/wiki/INSEE_code
-* http://resoo.org/docs/_docs/regles-numero-insee.pdf
+* 
https://web.archive.org/web/20160910153938/http://resoo.org/docs/_docs/regles-numero-insee.pdf
 * https://fr.wikipedia.org/wiki/Numéro_de_sécurité_sociale_en_France
 * https://xml.insee.fr/schema/nir.html
 
diff --git a/stdnum/si/maticna.py b/stdnum/si/maticna.py
index 22f1aae..a7b5ac9 100644
--- a/stdnum/si/maticna.py
+++ b/stdnum/si/maticna.py
@@ -33,7 +33,7 @@ always represents the main registered address.
 
 More information:
 
-* http://www.pisrs.si/Pis.web/pregledPredpisa?id=URED7599
+* https://pisrs.si/pregledPredpisa?id=URED7599
 
 >>> validate('9331310000')
 '9331310'

-----------------------------------------------------------------------

Summary of changes:
 stdnum/cr/cpj.py        |  2 +-
 stdnum/fr/nir.py        |  2 +-
 stdnum/si/maticna.py    |  2 +-
 stdnum/util.py          |  2 +-
 update/be_banks.py      | 15 ++++++++-------
 update/cfi.py           |  4 ++--
 update/cn_loc.py        |  6 +++---
 update/gs1_ai.py        | 10 +++++-----
 update/iban.py          | 22 +++++++++++++++++-----
 update/isil.py          |  4 ++--
 update/my_bp.py         |  4 ++--
 update/requirements.txt |  1 -
 12 files changed, 43 insertions(+), 31 deletions(-)


hooks/post-receive
-- 
python-stdnum