lists.arthurdejong.org
RSS feed

python-stdnum branch master updated. 1.20-11-g3fcebb2

[Date Prev][Date Next] [Thread Prev][Thread Next]

python-stdnum branch master updated. 1.20-11-g3fcebb2



This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "python-stdnum".

The branch, master has been updated
       via  3fcebb2961b0b84b6008a0b27990ace757872e10 (commit)
      from  6cbb9bc09c25fbda7a032521bc57b44e0ce18ec4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
https://arthurdejong.org/git/python-stdnum/commit/?id=3fcebb2961b0b84b6008a0b27990ace757872e10

commit 3fcebb2961b0b84b6008a0b27990ace757872e10
Author: Arthur de Jong <arthur@arthurdejong.org>
Date:   Sat Sep 14 15:39:55 2024 +0200

    Customise certificate validation for web services
    
    This adds a `verify` argument to all functions that use network services
    for lookups. The option is used to configure how certificate validation
    works, the same as in the requests library.
    
    For SOAP requests this is implemented properly when using the Zeep
    library. The implementations using Suds and PySimpleSOAP have been
    updated on a best-effort basis but their use has been deprecated because
    they do not seem to work in practice in a lot of cases already.
    
    Related to https://github.com/arthurdejong/python-stdnum/issues/452
    Related to https://github.com/arthurdejong/python-stdnum/pull/453

diff --git a/setup.py b/setup.py
index 6cbb31d..b8819ef 100755
--- a/setup.py
+++ b/setup.py
@@ -84,8 +84,6 @@ setup(
         # The SOAP feature is only required for a number of online tests
         # of numbers such as the EU VAT VIES lookup, the Dominican Republic
         # DGII services or the Turkish T.C. Kimlik validation.
-        'SOAP': ['zeep'],      # recommended implementation
-        'SOAP-ALT': ['suds'],  # but this should also work
-        'SOAP-FALLBACK': ['PySimpleSOAP'],  # this is a fallback
+        'SOAP': ['zeep'],
     },
 )
diff --git a/stdnum/by/unp.py b/stdnum/by/unp.py
index 5bd8b59..7279543 100644
--- a/stdnum/by/unp.py
+++ b/stdnum/by/unp.py
@@ -1,7 +1,7 @@
 # unp.py - functions for handling Belarusian UNP numbers
 # coding: utf-8
 #
-# Copyright (C) 2020 Arthur de Jong
+# Copyright (C) 2020-2024 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -104,9 +104,15 @@ def is_valid(number):
         return False
 
 
-def check_nalog(number, timeout=30):  # pragma: no cover (not part of normal 
test suite)
+def check_nalog(number, timeout=30, verify=True):  # pragma: no cover (not 
part of normal test suite)
     """Retrieve registration information from the portal.nalog.gov.by web site.
 
+    The `timeout` argument specifies the network timeout in seconds.
+
+    The `verify` argument is either a boolean that determines whether the
+    server's certificate is validate or a string which must be a path the CA
+    certificate bundle to use for verification.
+
     This basically returns the JSON response from the web service as a dict.
     Will return ``None`` if the number is invalid or unknown.
     """
@@ -121,6 +127,7 @@ def check_nalog(number, timeout=30):  # pragma: no cover 
(not part of normal tes
             'unp': compact(number),
             'charset': 'UTF-8',
             'type': 'json'},
-        timeout=timeout)
+        timeout=timeout,
+        verify=verify)
     if response.ok and response.content:
         return response.json()['row']
diff --git a/stdnum/ch/uid.py b/stdnum/ch/uid.py
index 17fa776..97a0f5f 100644
--- a/stdnum/ch/uid.py
+++ b/stdnum/ch/uid.py
@@ -1,7 +1,7 @@
 # uid.py - functions for handling Swiss business identifiers
 # coding: utf-8
 #
-# Copyright (C) 2015-2022 Arthur de Jong
+# Copyright (C) 2015-2024 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -100,12 +100,18 @@ def format(number):
 uid_wsdl = 'https://www.uid-wse.admin.ch/V5.0/PublicServices.svc?wsdl'
 
 
-def check_uid(number, timeout=30):  # pragma: no cover
+def check_uid(number, timeout=30, verify=True):  # pragma: no cover
     """Look up information via the Swiss Federal Statistical Office web 
service.
 
     This uses the UID registry web service run by the the Swiss Federal
     Statistical Office to provide more details on the provided number.
 
+    The `timeout` argument specifies the network timeout in seconds.
+
+    The `verify` argument is either a boolean that determines whether the
+    server's certificate is validate or a string which must be a path the CA
+    certificate bundle to use for verification.
+
     Returns a dict-like object for valid numbers with the following structure::
 
         {
@@ -145,7 +151,7 @@ def check_uid(number, timeout=30):  # pragma: no cover
     # this function isn't always tested because it would require network access
     # for the tests and might unnecessarily load the web service
     number = compact(number)
-    client = get_soap_client(uid_wsdl, timeout)
+    client = get_soap_client(uid_wsdl, timeout=timeout, verify=verify)
     try:
         return client.GetByUID(uid={'uidOrganisationIdCategorie': number[:3], 
'uidOrganisationId': number[3:]})[0]
     except Exception:  # noqa: B902 (exception type depends on SOAP client)
diff --git a/stdnum/de/handelsregisternummer.py 
b/stdnum/de/handelsregisternummer.py
index 0cc2947..333e5f4 100644
--- a/stdnum/de/handelsregisternummer.py
+++ b/stdnum/de/handelsregisternummer.py
@@ -2,7 +2,7 @@
 # coding: utf-8
 #
 # Copyright (C) 2015 Holvi Payment Services Oy
-# Copyright (C) 2018-2022 Arthur de Jong
+# Copyright (C) 2018-2024 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -325,9 +325,15 @@ def is_valid(number):
 _offeneregister_url = 'https://db.offeneregister.de/openregister.json'
 
 
-def check_offeneregister(number, timeout=30):  # pragma: no cover (not part of 
normal test suite)
+def check_offeneregister(number, timeout=30, verify=True):  # pragma: no cover 
(not part of normal test suite)
     """Retrieve registration information from the OffeneRegister.de web site.
 
+    The `timeout` argument specifies the network timeout in seconds.
+
+    The `verify` argument is either a boolean that determines whether the
+    server's certificate is validate or a string which must be a path the CA
+    certificate bundle to use for verification.
+
     This basically returns the JSON response from the web service as a dict.
     It will contain something like the following::
 
@@ -362,7 +368,8 @@ def check_offeneregister(number, timeout=30):  # pragma: no 
cover (not part of n
                    limit 1
                    ''',
             'p0': '%s %s %s' % (court, registry, number)},
-        timeout=timeout)
+        timeout=timeout,
+        verify=verify)
     response.raise_for_status()
     try:
         json = response.json()
diff --git a/stdnum/do/ncf.py b/stdnum/do/ncf.py
index 711c9e1..eea6b3d 100644
--- a/stdnum/do/ncf.py
+++ b/stdnum/do/ncf.py
@@ -1,7 +1,7 @@
 # ncf.py - functions for handling Dominican Republic invoice numbers
 # coding: utf-8
 #
-# Copyright (C) 2017-2018 Arthur de Jong
+# Copyright (C) 2017-2024 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -157,7 +157,7 @@ def _convert_result(result):  # pragma: no cover
         for key, value in result.items())
 
 
-def check_dgii(rnc, ncf, buyer_rnc=None, security_code=None, timeout=30):  # 
pragma: no cover
+def check_dgii(rnc, ncf, buyer_rnc=None, security_code=None, timeout=30, 
verify=True):  # pragma: no cover
     """Validate the RNC, NCF combination on using the DGII online web service.
 
     This uses the validation service run by the the Dirección General de
@@ -165,6 +165,12 @@ def check_dgii(rnc, ncf, buyer_rnc=None, 
security_code=None, timeout=30):  # pra
     whether the combination of RNC and NCF is valid. The timeout is in
     seconds.
 
+    The `timeout` argument specifies the network timeout in seconds.
+
+    The `verify` argument is either a boolean that determines whether the
+    server's certificate is validate or a string which must be a path the CA
+    certificate bundle to use for verification.
+
     Returns a dict with the following structure for a NCF::
 
         {
@@ -201,6 +207,7 @@ def check_dgii(rnc, ncf, buyer_rnc=None, 
security_code=None, timeout=30):  # pra
         buyer_rnc = rnc_compact(buyer_rnc)
     url = 
'https://dgii.gov.do/app/WebApps/ConsultasWeb2/ConsultasWeb/consultas/ncf.aspx'
     session = requests.Session()
+    session.verify = verify
     session.headers.update({
         'User-Agent': 'Mozilla/5.0 (python-stdnum)',
     })
diff --git a/stdnum/do/rnc.py b/stdnum/do/rnc.py
index ddc6c3b..fdb4f04 100644
--- a/stdnum/do/rnc.py
+++ b/stdnum/do/rnc.py
@@ -1,7 +1,7 @@
 # rnc.py - functions for handling Dominican Republic tax registration
 # coding: utf-8
 #
-# Copyright (C) 2015-2018 Arthur de Jong
+# Copyright (C) 2015-2024 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -115,12 +115,18 @@ def _convert_result(result):  # pragma: no cover
         for key, value in json.loads(result.replace('\n', '\\n').replace('\t', 
'\\t')).items())
 
 
-def check_dgii(number, timeout=30):  # pragma: no cover
+def check_dgii(number, timeout=30, verify=True):  # pragma: no cover
     """Lookup the number using the DGII online web service.
 
     This uses the validation service run by the the Dirección General de
     Impuestos Internos, the Dominican Republic tax department to lookup
-    registration information for the number. The timeout is in seconds.
+    registration information for the number.
+
+    The `timeout` argument specifies the network timeout in seconds.
+
+    The `verify` argument is either a boolean that determines whether the
+    server's certificate is validate or a string which must be a path the CA
+    certificate bundle to use for verification.
 
     Returns a dict with the following structure::
 
@@ -137,7 +143,7 @@ def check_dgii(number, timeout=30):  # pragma: no cover
     # this function isn't automatically tested because it would require
     # network access for the tests and unnecessarily load the online service
     number = compact(number)
-    client = get_soap_client(dgii_wsdl, timeout)
+    client = get_soap_client(dgii_wsdl, timeout=timeout, verify=verify)
     result = client.GetContribuyentes(
         value=number,
         patronBusqueda=0,   # search type: 0=by number, 1=by name
@@ -152,7 +158,7 @@ def check_dgii(number, timeout=30):  # pragma: no cover
     return _convert_result(result[0])
 
 
-def search_dgii(keyword, end_at=10, start_at=1, timeout=30):  # pragma: no 
cover
+def search_dgii(keyword, end_at=10, start_at=1, timeout=30, verify=True):  # 
pragma: no cover
     """Search the DGII online web service using the keyword.
 
     This uses the validation service run by the the Dirección General de
@@ -160,7 +166,13 @@ def search_dgii(keyword, end_at=10, start_at=1, 
timeout=30):  # pragma: no cover
     registration information using the keyword.
 
     The number of entries returned can be tuned with the `end_at` and
-    `start_at` arguments. The timeout is in seconds.
+    `start_at` arguments.
+
+    The `timeout` argument specifies the network timeout in seconds.
+
+    The `verify` argument is either a boolean that determines whether the
+    server's certificate is validate or a string which must be a path the CA
+    certificate bundle to use for verification.
 
     Returns a list of dicts with the following structure::
 
@@ -180,7 +192,7 @@ def search_dgii(keyword, end_at=10, start_at=1, 
timeout=30):  # pragma: no cover
     Will return an empty list if the number is invalid or unknown."""
     # this function isn't automatically tested because it would require
     # network access for the tests and unnecessarily load the online service
-    client = get_soap_client(dgii_wsdl, timeout)
+    client = get_soap_client(dgii_wsdl, timeout=timeout, verify=verify)
     results = client.GetContribuyentes(
         value=keyword,
         patronBusqueda=1,       # search type: 0=by number, 1=by name
diff --git a/stdnum/eu/vat.py b/stdnum/eu/vat.py
index 33d18b8..cb5d417 100644
--- a/stdnum/eu/vat.py
+++ b/stdnum/eu/vat.py
@@ -1,7 +1,7 @@
 # vat.py - functions for handling European VAT numbers
 # coding: utf-8
 #
-# Copyright (C) 2012-2021 Arthur de Jong
+# Copyright (C) 2012-2024 Arthur de Jong
 # Copyright (C) 2015 Lionel Elie Mamane
 #
 # This library is free software; you can redistribute it and/or
@@ -123,30 +123,50 @@ def guess_country(number):
             if _get_cc_module(cc).is_valid(number)]
 
 
-def check_vies(number, timeout=30):  # pragma: no cover (not part of normal 
test suite)
-    """Query the online European Commission VAT Information Exchange System
+def check_vies(number, timeout=30, verify=True):  # pragma: no cover (not part 
of normal test suite)
+    """Use the EU VIES service to validate the provided number.
+
+    Query the online European Commission VAT Information Exchange System
     (VIES) for validity of the provided number. Note that the service has
-    usage limitations (see the VIES website for details). The timeout is in
-    seconds. This returns a dict-like object."""
+    usage limitations (see the VIES website for details).
+
+    The `timeout` argument specifies the network timeout in seconds.
+
+    The `verify` argument is either a boolean that determines whether the
+    server's certificate is validate or a string which must be a path the CA
+    certificate bundle to use for verification.
+
+    Returns a dict-like object.
+    """
     # this function isn't automatically tested because it would require
     # network access for the tests and unnecessarily load the VIES website
     number = compact(number)
-    client = get_soap_client(vies_wsdl, timeout)
+    client = get_soap_client(vies_wsdl, timeout=timeout, verify=verify)
     return client.checkVat(number[:2], number[2:])
 
 
-def check_vies_approx(number, requester, timeout=30):  # pragma: no cover
-    """Query the online European Commission VAT Information Exchange System
+def check_vies_approx(number, requester, timeout=30, verify=True):  # pragma: 
no cover
+    """Use the EU VIES service to validate the provided number.
+
+    Query the online European Commission VAT Information Exchange System
     (VIES) for validity of the provided number, providing a validity
     certificate as proof. You will need to give your own VAT number for this
     to work. Note that the service has usage limitations (see the VIES
-    website for details). The timeout is in seconds. This returns a dict-like
-    object."""
+    website for details).
+
+    The `timeout` argument specifies the network timeout in seconds.
+
+    The `verify` argument is either a boolean that determines whether the
+    server's certificate is validate or a string which must be a path the CA
+    certificate bundle to use for verification.
+
+    Returns a dict-like object.
+    """
     # this function isn't automatically tested because it would require
     # network access for the tests and unnecessarily load the VIES website
     number = compact(number)
     requester = compact(requester)
-    client = get_soap_client(vies_wsdl, timeout)
+    client = get_soap_client(vies_wsdl, timeout=timeout, verify=verify)
     return client.checkVatApprox(
         countryCode=number[:2], vatNumber=number[2:],
         requesterCountryCode=requester[:2], requesterVatNumber=requester[2:])
diff --git a/stdnum/tr/tckimlik.py b/stdnum/tr/tckimlik.py
index e3dfcea..9a5df41 100644
--- a/stdnum/tr/tckimlik.py
+++ b/stdnum/tr/tckimlik.py
@@ -1,7 +1,7 @@
 # tckimlik.py - functions for handling T.C. Kimlik No.
 # coding: utf-8
 #
-# Copyright (C) 2016-2018 Arthur de Jong
+# Copyright (C) 2016-2024 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -89,15 +89,24 @@ def is_valid(number):
         return False
 
 
-def check_kps(number, name, surname, birth_year, timeout):  # pragma: no cover
-    """Query the online T.C. Kimlik validation service run by the Directorate
+def check_kps(number, name, surname, birth_year, timeout=30, verify=True):  # 
pragma: no cover
+    """Use the T.C. Kimlik validation service to check the provided number.
+
+    Query the online T.C. Kimlik validation service run by the Directorate
     of Population and Citizenship Affairs. The timeout is in seconds. This
     returns a boolean but may raise a SOAP exception for missing or invalid
-    values."""
+    values.
+
+    The `timeout` argument specifies the network timeout in seconds.
+
+    The `verify` argument is either a boolean that determines whether the
+    server's certificate is validate or a string which must be a path the CA
+    certificate bundle to use for verification.
+    """
     # this function isn't automatically tested because it would require
     # network access for the tests and unnecessarily load the online service
     number = compact(number)
-    client = get_soap_client(tckimlik_wsdl, timeout)
+    client = get_soap_client(tckimlik_wsdl, timeout=timeout, verify=verify)
     result = client.TCKimlikNoDogrula(
         TCKimlikNo=number, Ad=name, Soyad=surname, DogumYili=birth_year)
     if hasattr(result, 'get'):
diff --git a/stdnum/util.py b/stdnum/util.py
index 4582242..b625c72 100644
--- a/stdnum/util.py
+++ b/stdnum/util.py
@@ -1,7 +1,7 @@
 # util.py - common utility functions
 # coding: utf-8
 #
-# Copyright (C) 2012-2021 Arthur de Jong
+# Copyright (C) 2012-2024 Arthur de Jong
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -28,6 +28,7 @@ stdnum.
 import pkgutil
 import pydoc
 import re
+import ssl
 import sys
 import unicodedata
 import warnings
@@ -244,42 +245,81 @@ def get_cc_module(cc, name):
 _soap_clients = {}
 
 
-def get_soap_client(wsdlurl, timeout=30):  # pragma: no cover (not part of 
normal test suite)
+def _get_zeep_soap_client(wsdlurl, timeout, verify):  # pragma: no cover (not 
part of normal test suite)
+    from requests import Session
+    from zeep import CachingClient
+    from zeep.transports import Transport
+    session = Session()
+    session.verify = verify
+    transport = Transport(operation_timeout=timeout, timeout=timeout, 
session=session)
+    return CachingClient(wsdlurl, transport=transport).service
+
+
+def _get_suds_soap_client(wsdlurl, timeout, verify):  # pragma: no cover (not 
part of normal test suite)
+    # other implementations require passing the proxy config
+    try:
+        from urllib.request import getproxies
+    except ImportError:  # Python 2 specific
+        from urllib import getproxies
+    try:
+        from urllib.request import HTTPSHandler
+    except ImportError:  # Python 2 specific
+        from urllib2 import HTTPSHandler
+    from suds.client import Client
+    from suds.transport.http import HttpTransport
+
+    class CustomSudsTransport(HttpTransport):
+
+        def u2handlers(self):
+            handlers = super(CustomSudsTransport, self).u2handlers()
+            if isinstance(verify, str):
+                if not os.path.isdir(verify):
+                    ssl_context = 
ssl.create_default_context(ssl.Purpose.SERVER_AUTH, capath=verify)
+                else:
+                    ssl_context = 
ssl.create_default_context(ssl.Purpose.SERVER_AUTH, cafile=verify)
+            else:
+                ssl_context = 
ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
+                if verify is False:
+                    ssl_context.check_hostname = False
+                    ssl_context.verify_mode = ssl.CERT_NONE
+            handlers.append(HTTPSHandler(context=ssl_context))
+            return handlers
+    warnings.warn(
+        'Use of Suds for SOAP requests is deprecated, please use Zeep instead',
+        DeprecationWarning, stacklevel=1)
+    return Client(wsdlurl, proxy=getproxies(), timeout=timeout, 
transport=CustomSudsTransport()).service
+
+
+def _get_pysimplesoap_soap_client(wsdlurl, timeout, verify):  # pragma: no 
cover (not part of normal test suite)
+    from pysimplesoap.client import SoapClient
+    if verify is False:
+        raise ValueError('PySimpleSOAP does not support verify=False')
+    kwargs = {}
+    if isinstance(verify, str):
+        kwargs['cacert'] = verify
+    warnings.warn(
+        'Use of PySimpleSOAP for SOAP requests is deprecated, please use Zeep 
instead',
+        DeprecationWarning, stacklevel=1)
+    return SoapClient(wsdl=wsdlurl, proxy=getproxies(), timeout=timeout, 
**kwargs)
+
+
+def get_soap_client(wsdlurl, timeout=30, verify=True):  # pragma: no cover 
(not part of normal test suite)
     """Get a SOAP client for performing requests. The client is cached. The
-    timeout is in seconds."""
+    timeout is in seconds. The verify parameter is either True (the default), 
False
+    (to disabled certificate validation) or string value pointing to a CA 
certificate
+    file.
+    """
     # this function isn't automatically tested because the functions using
-    # it are not automatically tested
+    # it are not automatically tested and it requires network access for proper
+    # testing
     if (wsdlurl, timeout) not in _soap_clients:
-        # try zeep first
-        try:
-            from zeep.transports import Transport
-            transport = Transport(operation_timeout=timeout, timeout=timeout)
-            from zeep import CachingClient
-            client = CachingClient(wsdlurl, transport=transport).service
-        except ImportError:
-            # fall back to non-caching zeep client
+        for function in (_get_zeep_soap_client, _get_suds_soap_client, 
_get_pysimplesoap_soap_client):
             try:
-                from zeep import Client
-                client = Client(wsdlurl, transport=transport).service
+                client = function(wsdlurl, timeout, verify)
+                break
             except ImportError:
-                # other implementations require passing the proxy config
-                try:
-                    from urllib import getproxies
-                except ImportError:
-                    from urllib.request import getproxies
-                # fall back to suds
-                try:
-                    from suds.client import Client
-                    client = Client(
-                        wsdlurl, proxy=getproxies(), timeout=timeout).service
-                except ImportError:
-                    # use pysimplesoap as last resort
-                    try:
-                        from pysimplesoap.client import SoapClient
-                        client = SoapClient(
-                            wsdl=wsdlurl, proxy=getproxies(), timeout=timeout)
-                    except ImportError:
-                        raise ImportError(
-                            'No SOAP library (such as zeep) found')
+                pass
+        else:
+            raise ImportError('No SOAP library (such as zeep) found')
         _soap_clients[(wsdlurl, timeout)] = client
     return _soap_clients[(wsdlurl, timeout)]

-----------------------------------------------------------------------

Summary of changes:
 setup.py                           |   4 +-
 stdnum/by/unp.py                   |  13 +++--
 stdnum/ch/uid.py                   |  12 +++--
 stdnum/de/handelsregisternummer.py |  13 +++--
 stdnum/do/ncf.py                   |  11 +++-
 stdnum/do/rnc.py                   |  26 ++++++---
 stdnum/eu/vat.py                   |  42 +++++++++++----
 stdnum/tr/tckimlik.py              |  19 +++++--
 stdnum/util.py                     | 106 +++++++++++++++++++++++++------------
 9 files changed, 176 insertions(+), 70 deletions(-)


hooks/post-receive
-- 
python-stdnum