python-stdnum branch master updated. 1.15-3-g407a02f
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
python-stdnum branch master updated. 1.15-3-g407a02f
- From: Commits of the python-stdnum project <python-stdnum-commits [at] lists.arthurdejong.org>
- To: python-stdnum-commits [at] lists.arthurdejong.org
- Reply-to: python-stdnum-users [at] lists.arthurdejong.org, python-stdnum-commits [at] lists.arthurdejong.org
- Subject: python-stdnum branch master updated. 1.15-3-g407a02f
- Date: Sun, 24 Jan 2021 15:45:45 +0100 (CET)
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "python-stdnum".
The branch, master has been updated
via 407a02f98c8c8dd911e040eb6f8a784a57f40af6 (commit)
from 53f13b4af087250e43f8841c01906e46d5687b44 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
https://arthurdejong.org/git/python-stdnum/commit/?id=407a02f98c8c8dd911e040eb6f8a784a57f40af6
commit 407a02f98c8c8dd911e040eb6f8a784a57f40af6
Author: Arthur de Jong <arthur@arthurdejong.org>
Date: Sun Jan 24 15:37:20 2021 +0100
Switch postal code download to Austrian open-data portal
This simplifies the process of downloading Austrian postal codes by
downloading a JSON blob instead from
https://www.data.gv.at/katalog/dataset/f76ed887-00d6-450f-a158-9f8b1cbbeebf
This filters the list to only use addressable (adressierbar) postal
codes because it matches the previous list.
Thanks Bernd Schlapsi for providing the pointer.
Closes https://github.com/arthurdejong/python-stdnum/issues/235
diff --git a/stdnum/at/postleitzahl.dat b/stdnum/at/postleitzahl.dat
index 0c5a466..1c6bd46 100644
--- a/stdnum/at/postleitzahl.dat
+++ b/stdnum/at/postleitzahl.dat
@@ -1,5 +1,5 @@
-# generated from PLZ_Verzeichnis-07012021.xls downloaded from
-# https://www.post.at/g/c/postlexikon
+# generated from https://data.rtr.at/api/v1/tables/plz.json
+# version 20232 published 2020-07-06T10:40:00+02:00
1010 location="Wien" region="Wien"
1020 location="Wien" region="Wien"
1030 location="Wien" region="Wien"
diff --git a/update/at_postleitzahl.py b/update/at_postleitzahl.py
index c596848..0ea7851 100755
--- a/update/at_postleitzahl.py
+++ b/update/at_postleitzahl.py
@@ -24,23 +24,12 @@
from __future__ import print_function, unicode_literals
-import os
-import os.path
-
-import lxml.html
import requests
-import xlrd
-
-try:
- from urllib.parse import urljoin
-except ImportError:
- from urlparse import urljoin
+# The URL of postal codes on the Austrian open-data portal in CSV format.
+download_url = 'https://data.rtr.at/api/v1/tables/plz.json'
-# The page that contains a link to the downloadable spreadsheet with current
-# Austrian postal codes
-base_url = 'https://www.post.at/g/c/postlexikon'
# The list of regions that can be used in the document.
regions = {
@@ -55,57 +44,19 @@ regions = {
'W': 'Wien',
}
-# The user agent that will be passed in requests
-user_agent = 'Mozilla/5.0 (compatible; python-stdnum updater;
+https://arthurdejong.org/python-stdnum/)'
-
-
-# Custom headers that will be passed to requests
-headers = {
- 'User-Agent': user_agent,
-}
-
-
-def find_download_url():
- """Extract the spreadsheet URL from the Austrian Post website."""
- response = requests.get(base_url, headers=headers)
- response.raise_for_status()
- document = lxml.html.document_fromstring(response.content)
- url = [
- a.get('href')
- for a in document.findall('.//a[@href]')
- if 'Werben/PLZ_Verzeichnis' in a.get('href')][0]
- return urljoin(base_url, url.split('?')[0])
-
-
-def get_postal_codes(download_url):
- """Download the Austrian postal codes spreadsheet."""
- response = requests.get(download_url, headers=headers)
- response.raise_for_status()
- workbook = xlrd.open_workbook(
- file_contents=response.content, logfile=open(os.devnull, 'w'))
- sheet = workbook.sheet_by_index(0)
- rows = sheet.get_rows()
- # the first row contains the column names
- columns = [column.value.lower() for column in next(rows)]
- # the other rows contain data
- for row in rows:
- data = dict(zip(
- columns,
- [column.value for column in row]))
- if data['adressierbar'].lower() == 'ja':
- yield (
- data['plz'],
- data['ort'],
- regions.get(data['bundesland']))
-
if __name__ == '__main__':
- # download/parse the information
- download_url = find_download_url()
+ response = requests.get(download_url)
+ response.raise_for_status()
+ data = response.json()
# print header
- print('# generated from %s downloaded from' %
- os.path.basename(download_url))
- print('# %s' % base_url)
+ print('# generated from %s' % download_url)
+ print('# version %s published %s' % (
+ data['version']['id'], data['version']['published']))
# build an ordered list of postal codes
- for code, location, region in sorted(get_postal_codes(download_url)):
+ results = []
+ for row in data['data']:
+ if row['adressierbar'] == 'Ja':
+ results.append((str(row['plz']), row['ort'],
regions[row['bundesland']]))
+ for code, location, region in sorted(results):
print('%s location="%s" region="%s"' % (code, location, region))
-----------------------------------------------------------------------
Summary of changes:
stdnum/at/postleitzahl.dat | 4 +--
update/at_postleitzahl.py | 75 ++++++++--------------------------------------
2 files changed, 15 insertions(+), 64 deletions(-)
hooks/post-receive
--
python-stdnum
- python-stdnum branch master updated. 1.15-3-g407a02f,
Commits of the python-stdnum project