lists.arthurdejong.org
RSS feed

python-stdnum commit: r61 - in python-stdnum: . stdnum tests

[Date Prev][Date Next] [Thread Prev][Thread Next]

python-stdnum commit: r61 - in python-stdnum: . stdnum tests



Author: arthur
Date: Sat Feb  5 23:43:44 2011
New Revision: 61
URL: http://arthurdejong.org/viewvc/python-stdnum?view=rev&revision=61

Log:
add an ISIL (International Standard Identifier for Libraries and Related 
Organizations) module

Added:
   python-stdnum/getisil.py   (contents, props changed)
   python-stdnum/stdnum/isil.dat
   python-stdnum/stdnum/isil.py
Modified:
   python-stdnum/tests/test_robustness.doctest

Added: python-stdnum/getisil.py
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ python-stdnum/getisil.py    Sat Feb  5 23:43:44 2011        (r61)
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+
+# getisil.py - script to donwload ISIL agencies
+#
+# Copyright (C) 2011 Arthur de Jong
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""This script downloads a web page from the ISIL Registration Authority
+and screen-scrapes the national and non-national ISIL agencies and
+code prefixes."""
+
+import urllib
+import BeautifulSoup
+import re
+
+spaces_re = re.compile('\s+', re.UNICODE)
+
+# the web page that holds information on the ISIL authorities
+download_url = 'http://biblstandard.dk/isil/'
+
+def clean(s):
+    """Cleans up the string removing unneeded stuff from it."""
+    return spaces_re.sub(' ', s.replace(u'\u0096', '')).strip().encode('utf-8')
+
+def parse(f):
+    """Parse the specified file."""
+    print '# generated from ISIL Registration Authority, downloaded from'
+    print '# %s' % download_url
+    soup = BeautifulSoup.BeautifulSoup(f, convertEntities='html')
+    # find all table rows
+    for tr in soup.findAll('tr'):
+        # find the rows with four columns of text
+        tds = tr.findAll('td', attrs={'class': 'text'}, recursive=False)
+        if len(tds) == 4:
+            props = {}
+            cc = clean(tds[0].string)
+            if tds[1].string:
+                props['country'] = clean(tds[1].contents[0])
+            ra_a = tds[2].find('a')
+            if ra_a:
+                props['ra'] = clean(ra_a.string)
+                props['ra_url'] = clean(ra_a['href'])
+            elif tds[2].string:
+                props['ra'] = clean(tds[2].string)
+            # we could also get the search urls from tds[3].findAll('a')
+            print '%s$ %s' % ( cc,
+                  ' '.join([ '%s="%s"' % (x, y) for x, y in props.iteritems() 
]) )
+
+if __name__ == '__main__':
+    #f = open('isil.html', 'r')
+    f = urllib.urlopen(download_url)
+    parse(f)

Added: python-stdnum/stdnum/isil.dat
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ python-stdnum/stdnum/isil.dat       Sat Feb  5 23:43:44 2011        (r61)
@@ -0,0 +1,30 @@
+# generated from ISIL Registration Authority, downloaded from
+# http://biblstandard.dk/isil/
+AU$ country="Australia" ra_url="http://www.nla.gov.au/ilrs" ra="National 
Library of Australia"
+AR$ country="Argentine Republic" ra_url="http://www.iram.org.ar" ra="Argentine 
Standardization and Certification Institute (IRAM)"
+AT$ country="Austria" ra_url="http://www.obvsg.at" ra="Die Österreichische 
Bibliothekenverbund und Service GmbH"
+BE$ country="Belgium" ra_url="http://www.kbr.be" ra="Royal Library of Belgium"
+BY$ country="Belarus" 
ra_url="http://www.nlb.by/portal/page/portal/index?lang=en" ra="National 
Library of Belarus"
+CA$ country="Canada" 
ra_url="http://www.collectionscanada.ca/ill/s16-206-e.html#3.2.2" ra="Library 
and Archives Canada"
+CH$ country="Switzerland" 
ra_url="http://www.nb.admin.ch/slb/slb_professionnel/01540/index.html?lang=en" 
ra="Swiss National Library"
+CY$ country="Cyprus" 
ra_url="http://www.cut.ac.cy/library/english/isil_reg_agent.htm" ra="Cyprus 
University of Technology Library"
+DE$ country="Germany" ra_url="http://sigel.staatsbibliothek-berlin.de/quot; 
ra="Staatsbibliothek zu Berlin"
+DK$ country="Denmark" 
ra_url="http://www.bibliotekogmedier.dk/english/standards/danish-library-number"
 ra="Danish Agency for Libraries and Media"
+EG$ country="Egypt" ra_url="http://www.sti.sci.eg/focal_point.htm" 
ra="Egyptian National Scientific and Technical Information Network (ENSTINET)"
+FI$ country="Finland" 
ra_url="http://www.lib.helsinki.fi/english/libraries/standards/ISIL.htm" 
ra="The National Library of Finland"
+FR$ country="France" ra_url="http://www.abes.fr" ra="Agence Bibliographique de 
l'Enseignement Superieur"
+GB$ country="United Kingdom" 
ra_url="http://www.bl.uk/bibliographic/isilagency.html" ra="British Library"
+GL$ country="Greenland" 
ra_url="http://www.katak.gl/ISIL/Greenlandic_library_identifiers.html" 
ra="Central and Public Library of Greenland"
+IL$ country="Israel" ra_url="http://nli.org.il/eng" ra="The National Library 
of Israel"
+IR$ country="Islamic Republic of Iran" 
ra_url="http://www.nlai.ir/special_services/stds/isil.htm" ra="National Library 
of Iran"
+IT$ country="Italy" ra_url="http://www.iccu.sbn.it/genera.jsp?id=78&l=en" 
ra="Istituto Centrale per il Catalogo Unico delle biblioteche italiane e per le 
informazioni bibliografiche"
+KR$ country="Republic of Korea" ra_url="http://www.nl.go.kr/isil/" ra="The 
National Library of Korea"
+NL$ country="The Netherlands" ra_url="http://www.kb.nl/hpd/isil/index.html" 
ra="Koninklijke Bibliotheek"
+NO$ country="Norway" 
ra_url="http://www.nb.no/html/tildeling_av_nasjonalt_bibliot.html" ra="National 
Library of Norway"
+NZ$ country="New Zealand" 
ra_url="http://www.natlib.govt.nz/en/services/6docsupply.html#sect1" 
ra="National Library of New Zealand Te Puna Mātauranga o Aotearoa"
+SI$ country="The Republic of Slovenia" 
ra_url="http://www.nuk.uni-lj.si/nukeng3.asp?id=311364382" ra="National and 
University Library"
+US$ country="United States of America" ra="Library of Congress - under 
registration"
+M$ country="Library of Congress - outside US" ra="Library of Congress - under 
registration"
+O$ ra="See OCLC"
+OCLC$ country="WorldCat Symbol" ra_url="http://www.oclc.org" ra="OCLC"
+ZDB$ country="Staatsbibliothek zu Berlin - Zeitschriftendatenbank" 
ra="Staatsbibliothek zu Berlin"

Added: python-stdnum/stdnum/isil.py
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ python-stdnum/stdnum/isil.py        Sat Feb  5 23:43:44 2011        (r61)
@@ -0,0 +1,68 @@
+# isil.py - functions for handling identifiers for libraries and related
+#           organizations
+#
+# Copyright (C) 2011 Arthur de Jong
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""Module for handling ISIL (International Standard Identifier for
+Libraries and Related Organizations).
+
+>>> is_valid('IT-RM0267')
+True
+>>> is_valid('OCLC-DLC')
+True
+>>> is_valid('WW-RM0267') # unregistered country code
+False
+>>> format('it-RM0267')
+'IT-RM0267'
+"""
+
+# the valid characters in an ISIL
+_alphabet = 
set('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-:/')
+
+def compact(number):
+    """Convert the ISIL to the minimal representation. This strips
+    surrounding whitespace."""
+    return number.strip()
+
+def _known_agency(agency):
+    """Checks whether the specified agency is valid."""
+    # look it up in the db
+    from stdnum import numdb
+    results = numdb.get('isil').info(agency.upper() + '$')
+    # there should be only one part and it should have properties
+    return len(results) == 1 and bool(results[0][1])
+
+def is_valid(number):
+    """Checks to see if the number provided is a valid isil (or isilSV)
+    number."""
+    try:
+        number = compact(number)
+    except:
+        return False
+    for n in number:
+        if n not in _alphabet:
+            return False
+    return len(number) <= 15 and _known_agency(number.split('-')[0])
+
+def format(number):
+    """Reformat the passed number to the standard format."""
+    number = compact(number)
+    parts = number.split('-')
+    if len(parts) > 1 and _known_agency(parts[0]):
+        parts[0] = parts[0].upper()
+    return '-'.join(parts)

Modified: python-stdnum/tests/test_robustness.doctest
==============================================================================
--- python-stdnum/tests/test_robustness.doctest Sat Feb  5 23:32:01 2011        
(r60)
+++ python-stdnum/tests/test_robustness.doctest Sat Feb  5 23:43:44 2011        
(r61)
@@ -24,7 +24,7 @@
 
 >>> testvalues = ( None, '*&^%$', '', 0, False, object(),  )
 
->>> from stdnum import grid, iban, imei, isan, isbn, ismn, issn
+>>> from stdnum import grid, iban, imei, isan, isbn, isil, ismn, issn
 >>> from stdnum import luhn, meid, verhoeff
 >>> from stdnum.iso7064 import mod_11_10, mod_11_2, mod_37_2, mod_37_36, 
 >>> mod_97_10
 >>> from stdnum.nl import bsn
-- 
To unsubscribe send an email to
python-stdnum-commits-unsubscribe@lists.arthurdejong.org or see
http://lists.arthurdejong.org/python-stdnum-commits