python-stdnum commit: r61 - in python-stdnum: . stdnum tests
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
python-stdnum commit: r61 - in python-stdnum: . stdnum tests
- From: Commits of the python-stdnum project <python-stdnum-commits [at] lists.arthurdejong.org>
- To: python-stdnum-commits [at] lists.arthurdejong.org
- Reply-to: python-stdnum-users [at] lists.arthurdejong.org
- Subject: python-stdnum commit: r61 - in python-stdnum: . stdnum tests
- Date: Sat, 5 Feb 2011 23:43:46 +0100 (CET)
Author: arthur
Date: Sat Feb 5 23:43:44 2011
New Revision: 61
URL: http://arthurdejong.org/viewvc/python-stdnum?view=rev&revision=61
Log:
add an ISIL (International Standard Identifier for Libraries and Related
Organizations) module
Added:
python-stdnum/getisil.py (contents, props changed)
python-stdnum/stdnum/isil.dat
python-stdnum/stdnum/isil.py
Modified:
python-stdnum/tests/test_robustness.doctest
Added: python-stdnum/getisil.py
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ python-stdnum/getisil.py Sat Feb 5 23:43:44 2011 (r61)
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+
+# getisil.py - script to donwload ISIL agencies
+#
+# Copyright (C) 2011 Arthur de Jong
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""This script downloads a web page from the ISIL Registration Authority
+and screen-scrapes the national and non-national ISIL agencies and
+code prefixes."""
+
+import urllib
+import BeautifulSoup
+import re
+
+spaces_re = re.compile('\s+', re.UNICODE)
+
+# the web page that holds information on the ISIL authorities
+download_url = 'http://biblstandard.dk/isil/'
+
+def clean(s):
+ """Cleans up the string removing unneeded stuff from it."""
+ return spaces_re.sub(' ', s.replace(u'\u0096', '')).strip().encode('utf-8')
+
+def parse(f):
+ """Parse the specified file."""
+ print '# generated from ISIL Registration Authority, downloaded from'
+ print '# %s' % download_url
+ soup = BeautifulSoup.BeautifulSoup(f, convertEntities='html')
+ # find all table rows
+ for tr in soup.findAll('tr'):
+ # find the rows with four columns of text
+ tds = tr.findAll('td', attrs={'class': 'text'}, recursive=False)
+ if len(tds) == 4:
+ props = {}
+ cc = clean(tds[0].string)
+ if tds[1].string:
+ props['country'] = clean(tds[1].contents[0])
+ ra_a = tds[2].find('a')
+ if ra_a:
+ props['ra'] = clean(ra_a.string)
+ props['ra_url'] = clean(ra_a['href'])
+ elif tds[2].string:
+ props['ra'] = clean(tds[2].string)
+ # we could also get the search urls from tds[3].findAll('a')
+ print '%s$ %s' % ( cc,
+ ' '.join([ '%s="%s"' % (x, y) for x, y in props.iteritems()
]) )
+
+if __name__ == '__main__':
+ #f = open('isil.html', 'r')
+ f = urllib.urlopen(download_url)
+ parse(f)
Added: python-stdnum/stdnum/isil.dat
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ python-stdnum/stdnum/isil.dat Sat Feb 5 23:43:44 2011 (r61)
@@ -0,0 +1,30 @@
+# generated from ISIL Registration Authority, downloaded from
+# http://biblstandard.dk/isil/
+AU$ country="Australia" ra_url="http://www.nla.gov.au/ilrs" ra="National
Library of Australia"
+AR$ country="Argentine Republic" ra_url="http://www.iram.org.ar" ra="Argentine
Standardization and Certification Institute (IRAM)"
+AT$ country="Austria" ra_url="http://www.obvsg.at" ra="Die Österreichische
Bibliothekenverbund und Service GmbH"
+BE$ country="Belgium" ra_url="http://www.kbr.be" ra="Royal Library of Belgium"
+BY$ country="Belarus"
ra_url="http://www.nlb.by/portal/page/portal/index?lang=en" ra="National
Library of Belarus"
+CA$ country="Canada"
ra_url="http://www.collectionscanada.ca/ill/s16-206-e.html#3.2.2" ra="Library
and Archives Canada"
+CH$ country="Switzerland"
ra_url="http://www.nb.admin.ch/slb/slb_professionnel/01540/index.html?lang=en"
ra="Swiss National Library"
+CY$ country="Cyprus"
ra_url="http://www.cut.ac.cy/library/english/isil_reg_agent.htm" ra="Cyprus
University of Technology Library"
+DE$ country="Germany" ra_url="http://sigel.staatsbibliothek-berlin.de/quot;
ra="Staatsbibliothek zu Berlin"
+DK$ country="Denmark"
ra_url="http://www.bibliotekogmedier.dk/english/standards/danish-library-number"
ra="Danish Agency for Libraries and Media"
+EG$ country="Egypt" ra_url="http://www.sti.sci.eg/focal_point.htm"
ra="Egyptian National Scientific and Technical Information Network (ENSTINET)"
+FI$ country="Finland"
ra_url="http://www.lib.helsinki.fi/english/libraries/standards/ISIL.htm"
ra="The National Library of Finland"
+FR$ country="France" ra_url="http://www.abes.fr" ra="Agence Bibliographique de
l'Enseignement Superieur"
+GB$ country="United Kingdom"
ra_url="http://www.bl.uk/bibliographic/isilagency.html" ra="British Library"
+GL$ country="Greenland"
ra_url="http://www.katak.gl/ISIL/Greenlandic_library_identifiers.html"
ra="Central and Public Library of Greenland"
+IL$ country="Israel" ra_url="http://nli.org.il/eng" ra="The National Library
of Israel"
+IR$ country="Islamic Republic of Iran"
ra_url="http://www.nlai.ir/special_services/stds/isil.htm" ra="National Library
of Iran"
+IT$ country="Italy" ra_url="http://www.iccu.sbn.it/genera.jsp?id=78&l=en"
ra="Istituto Centrale per il Catalogo Unico delle biblioteche italiane e per le
informazioni bibliografiche"
+KR$ country="Republic of Korea" ra_url="http://www.nl.go.kr/isil/" ra="The
National Library of Korea"
+NL$ country="The Netherlands" ra_url="http://www.kb.nl/hpd/isil/index.html"
ra="Koninklijke Bibliotheek"
+NO$ country="Norway"
ra_url="http://www.nb.no/html/tildeling_av_nasjonalt_bibliot.html" ra="National
Library of Norway"
+NZ$ country="New Zealand"
ra_url="http://www.natlib.govt.nz/en/services/6docsupply.html#sect1"
ra="National Library of New Zealand Te Puna Mātauranga o Aotearoa"
+SI$ country="The Republic of Slovenia"
ra_url="http://www.nuk.uni-lj.si/nukeng3.asp?id=311364382" ra="National and
University Library"
+US$ country="United States of America" ra="Library of Congress - under
registration"
+M$ country="Library of Congress - outside US" ra="Library of Congress - under
registration"
+O$ ra="See OCLC"
+OCLC$ country="WorldCat Symbol" ra_url="http://www.oclc.org" ra="OCLC"
+ZDB$ country="Staatsbibliothek zu Berlin - Zeitschriftendatenbank"
ra="Staatsbibliothek zu Berlin"
Added: python-stdnum/stdnum/isil.py
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ python-stdnum/stdnum/isil.py Sat Feb 5 23:43:44 2011 (r61)
@@ -0,0 +1,68 @@
+# isil.py - functions for handling identifiers for libraries and related
+# organizations
+#
+# Copyright (C) 2011 Arthur de Jong
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""Module for handling ISIL (International Standard Identifier for
+Libraries and Related Organizations).
+
+>>> is_valid('IT-RM0267')
+True
+>>> is_valid('OCLC-DLC')
+True
+>>> is_valid('WW-RM0267') # unregistered country code
+False
+>>> format('it-RM0267')
+'IT-RM0267'
+"""
+
+# the valid characters in an ISIL
+_alphabet =
set('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-:/')
+
+def compact(number):
+ """Convert the ISIL to the minimal representation. This strips
+ surrounding whitespace."""
+ return number.strip()
+
+def _known_agency(agency):
+ """Checks whether the specified agency is valid."""
+ # look it up in the db
+ from stdnum import numdb
+ results = numdb.get('isil').info(agency.upper() + '$')
+ # there should be only one part and it should have properties
+ return len(results) == 1 and bool(results[0][1])
+
+def is_valid(number):
+ """Checks to see if the number provided is a valid isil (or isilSV)
+ number."""
+ try:
+ number = compact(number)
+ except:
+ return False
+ for n in number:
+ if n not in _alphabet:
+ return False
+ return len(number) <= 15 and _known_agency(number.split('-')[0])
+
+def format(number):
+ """Reformat the passed number to the standard format."""
+ number = compact(number)
+ parts = number.split('-')
+ if len(parts) > 1 and _known_agency(parts[0]):
+ parts[0] = parts[0].upper()
+ return '-'.join(parts)
Modified: python-stdnum/tests/test_robustness.doctest
==============================================================================
--- python-stdnum/tests/test_robustness.doctest Sat Feb 5 23:32:01 2011
(r60)
+++ python-stdnum/tests/test_robustness.doctest Sat Feb 5 23:43:44 2011
(r61)
@@ -24,7 +24,7 @@
>>> testvalues = ( None, '*&^%$', '', 0, False, object(), )
->>> from stdnum import grid, iban, imei, isan, isbn, ismn, issn
+>>> from stdnum import grid, iban, imei, isan, isbn, isil, ismn, issn
>>> from stdnum import luhn, meid, verhoeff
>>> from stdnum.iso7064 import mod_11_10, mod_11_2, mod_37_2, mod_37_36,
>>> mod_97_10
>>> from stdnum.nl import bsn
--
To unsubscribe send an email to
python-stdnum-commits-unsubscribe@lists.arthurdejong.org or see
http://lists.arthurdejong.org/python-stdnum-commits
- python-stdnum commit: r61 - in python-stdnum: . stdnum tests,
Commits of the python-stdnum project