lists.arthurdejong.org
RSS feed

webcheck commit: r427 - in webcheck: . parsers parsers/html plugins

[Date Prev][Date Next] [Thread Prev][Thread Next]

webcheck commit: r427 - in webcheck: . parsers parsers/html plugins



Author: arthur
Date: Thu Aug 18 23:22:26 2011
New Revision: 427
URL: http://arthurdejong.org/viewvc/webcheck?view=rev&revision=427

Log:
make source code changes to follow PEP8 more

Modified:
   webcheck/config.py
   webcheck/crawler.py
   webcheck/db.py
   webcheck/debugio.py
   webcheck/monkeypatch.py
   webcheck/myurllib.py
   webcheck/parsers/__init__.py
   webcheck/parsers/css.py
   webcheck/parsers/html/__init__.py
   webcheck/parsers/html/beautifulsoup.py
   webcheck/parsers/html/calltidy.py
   webcheck/parsers/html/htmlparser.py
   webcheck/plugins/__init__.py
   webcheck/plugins/about.py
   webcheck/plugins/anchors.py
   webcheck/plugins/badlinks.py
   webcheck/plugins/external.py
   webcheck/plugins/images.py
   webcheck/plugins/new.py
   webcheck/plugins/notchkd.py
   webcheck/plugins/notitles.py
   webcheck/plugins/old.py
   webcheck/plugins/problems.py
   webcheck/plugins/sitemap.py
   webcheck/plugins/size.py
   webcheck/plugins/urllist.py
   webcheck/webcheck.py

Modified: webcheck/config.py
==============================================================================
--- webcheck/config.py  Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/config.py  Thu Aug 18 23:22:26 2011        (r427)
@@ -64,19 +64,19 @@
 REDIRECT_DEPTH = 5
 
 # The list of plugins that will be used to generate the report.
-PLUGINS = [ 'anchors',
-            'sitemap',
-            'urllist',
-            'images',
-            'external',
-            'notchkd',
-            'badlinks',
-            'old',
-            'new',
-            'size',
-            'notitles',
-            'problems',
-            'about' ]
+PLUGINS = ['anchors',
+           'sitemap',
+           'urllist',
+           'images',
+           'external',
+           'notchkd',
+           'badlinks',
+           'old',
+           'new',
+           'size',
+           'notitles',
+           'problems',
+           'about']
 
 # Whether to overwrite files without asking. This is the state of the -f
 # command line option.
@@ -107,11 +107,11 @@
 
 # A list of names that will be checked when encountering an file:///
 # directory. This file will be picked up instead of the directory list.
-FILE_INDEXES = [ 'index.html', 'index.htm' ]
+FILE_INDEXES = ['index.html', 'index.htm']
 
 # A list of names that will be checked when encountering an ftp://
 # directory. This file will be picked up instead of the directory list.
-FTP_INDEXES = [ 'index.html', 'index.htm' ]
+FTP_INDEXES = ['index.html', 'index.htm']
 
 # Whether to fetch robots.txt files and do checking based on the information
 # present in those files (normally matching links are yanked).

Modified: webcheck/crawler.py
==============================================================================
--- webcheck/crawler.py Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/crawler.py Thu Aug 18 23:22:26 2011        (r427)
@@ -70,7 +70,8 @@
         pass
     atexit.register(cookiejar.save, ignore_discard=False, ignore_expires=False)
     # set up our custom opener that sets a meaningful user agent
-    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar), 
NoRedirectHandler())
+    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar),
+                                  NoRedirectHandler())
     opener.addheaders = [
       ('User-agent', 'webcheck %s' % config.VERSION),
       ]
@@ -147,7 +148,7 @@
         if config.BASE_URLS_ONLY:
             # the url must start with one of the _internal_urls
             for i in self._internal_urls:
-                res |= (i==url[:len(i)])
+                res |= (i == url[:len(i)])
         else:
             # the netloc must match a netloc of an _internal_url
             netloc = urlparse.urlsplit(url)[1]
@@ -169,18 +170,19 @@
         netloc."""
         # only some schemes have a meaningful robots.txt file
         if scheme != 'http' and scheme != 'https':
-            debugio.debug('crawler._get_robotparser() called with unsupported 
scheme (%s)' % scheme)
+            debugio.debug('crawler._get_robotparser() '
+                          'called with unsupported scheme (%s)' % scheme)
             return None
         # split out the key part of the url
         location = urlparse.urlunsplit((scheme, netloc, '', '', ''))
         # try to create a new robotparser if we don't already have one
-        if not self._robotparsers.has_key(location):
+        if location not in self._robotparsers:
             debugio.info('  getting robots.txt for %s' % location)
             self._robotparsers[location] = None
             try:
                 rp = robotparser.RobotFileParser()
                 rp.set_url(urlparse.urlunsplit(
-                  (scheme, netloc, '/robots.txt', '', '') ))
+                  (scheme, netloc, '/robots.txt', '', '')))
                 rp.read()
                 self._robotparsers[location] = rp
             except (TypeError, IOError, httplib.HTTPException):
@@ -277,7 +279,8 @@
             session.commit()
             # sleep between requests if configured
             if config.WAIT_BETWEEN_REQUESTS > 0:
-                debugio.debug('crawler.crawl(): sleeping %s seconds' % 
config.WAIT_BETWEEN_REQUESTS)
+                debugio.debug('crawler.crawl(): sleeping %s seconds' %
+                              config.WAIT_BETWEEN_REQUESTS)
                 time.sleep(config.WAIT_BETWEEN_REQUESTS)
             debugio.debug('crawler.crawl(): items left to check: %d' % 
len(tocheck))
         session.commit()

Modified: webcheck/db.py
==============================================================================
--- webcheck/db.py      Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/db.py      Thu Aug 18 23:22:26 2011        (r427)
@@ -177,7 +177,7 @@
         if self.anchors.filter(Anchor.anchor == anchor).first():
             self.add_pageproblem(
               'anchor/id "%(anchor)s" defined multiple times'
-              % { 'anchor':   anchor })
+              % {'anchor': anchor})
         else:
             self.anchors.append(Anchor(anchor=anchor))
 

Modified: webcheck/debugio.py
==============================================================================
--- webcheck/debugio.py Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/debugio.py Thu Aug 18 23:22:26 2011        (r427)
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006 Arthur de Jong
+# Copyright (C) 2005, 2006, 2011 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -32,30 +32,34 @@
 
 # log levels that can be used
 ERROR = 0
-WARN  = 1
-INFO  = 2
+WARN = 1
+INFO = 2
 DEBUG = 3
 
 # initialize logging at default level
 loglevel = INFO
 
+
 def debug(msg):
     """Log the message to stderr if loglevel will allow it."""
     if loglevel >= DEBUG:
-        sys.stderr.write('webcheck: DEBUG: '+str(msg)+'\n')
+        sys.stderr.write('webcheck: DEBUG: ' + str(msg) + '\n')
+
 
 def info(msg):
     """Log the message to stdout if loglevel will allow it."""
     if loglevel >= INFO:
-        sys.stdout.write('webcheck: '+str(msg)+'\n')
+        sys.stdout.write('webcheck: ' + str(msg) + '\n')
         sys.stdout.flush()
 
+
 def warn(msg):
     """Log a warning to stderr if loglevel will allow it."""
     if loglevel >= WARN:
-        sys.stderr.write('webcheck: Warning: '+str(msg)+'\n')
+        sys.stderr.write('webcheck: Warning: ' + str(msg) + '\n')
+
 
 def error(msg):
     """Log an error to stderr if loglevel will allow it."""
     if loglevel >= ERROR:
-        sys.stderr.write('webcheck: Error: '+str(msg)+'\n')
+        sys.stderr.write('webcheck: Error: ' + str(msg) + '\n')

Modified: webcheck/monkeypatch.py
==============================================================================
--- webcheck/monkeypatch.py     Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/monkeypatch.py     Thu Aug 18 23:22:26 2011        (r427)
@@ -28,6 +28,7 @@
 
 __all__ = []
 
+
 # This monkeypatches RuleLine.applies_to to support * and $ characters in
 # robots.txt path names.
 def my_applies_to(ruleline, filename):

Modified: webcheck/myurllib.py
==============================================================================
--- webcheck/myurllib.py        Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/myurllib.py        Thu Aug 18 23:22:26 2011        (r427)
@@ -54,6 +54,7 @@
 # pattern for leading dots
 _leadingdotpattern = re.compile('^(/\.\.)*')
 
+
 def _unescape_printable(match):
     """Helper function for _normalize_escapes() to perform the expansion of
     html entity refs that are normal printable (but not reserver)
@@ -65,6 +66,7 @@
     # transform remaining escapes to uppercase
     return match.group(1).upper()
 
+
 def _normalize_escapes(url):
     """Ensure that escaping in the url is consistent. Any reserved characters
     are left alone. Any characters that are printable but are escaped are
@@ -75,9 +77,10 @@
     # url encode any nonprintable or problematic characters (but not reserved
     # characters) so we're left with a string with everything that needs to be
     # quoted as such
-    url = _urlprobpattern.sub(lambda x:'%%%02X' % ord(x.group(1)), url)
+    url = _urlprobpattern.sub(lambda x: '%%%02X' % ord(x.group(1)), url)
     return url
 
+
 def _urlclean(url):
     """Clean the url of uneccesary parts."""
     # make escaping consistent
@@ -85,9 +88,9 @@
     # split the url in useful parts
     (scheme, netloc, path, query, fragment) = urlparse.urlsplit(url)
     # remove any leading /../ parts
-    if scheme in ( 'http', 'https' ):
+    if scheme in ('http', 'https'):
         path = _leadingdotpattern.sub('', path)
-    if scheme in ( 'http', 'https', 'ftp' ):
+    if scheme in ('http', 'https', 'ftp'):
         # http(s) urls should have a non-empty path
         if path == '':
             path = '/'
@@ -104,13 +107,14 @@
         if netloc[-1:] == ':':
             netloc = netloc[:-1]
         if userpass is not None:
-            netloc = userpass+'@'+netloc
+            netloc = userpass + '@' + netloc
     # get rid of double slashes in some paths
-    if ( scheme == 'file' ):
+    if scheme == 'file':
         path = _doubleslashpattern.sub('/', path)
     # put the url back together again
     return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
 
+
 def normalizeurl(url):
     """Return a normalized URL."""
     return _urlclean(url)

Modified: webcheck/parsers/__init__.py
==============================================================================
--- webcheck/parsers/__init__.py        Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/parsers/__init__.py        Thu Aug 18 23:22:26 2011        (r427)
@@ -1,7 +1,7 @@
 
 # __init__.py - general content-type parser interface
 #
-# Copyright (C) 2005, 2006 Arthur de Jong
+# Copyright (C) 2005, 2006, 2011 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -34,24 +34,27 @@
 # a map of mimetypes to modules
 _parsermodules = {}
 
+
 def _init_modules():
     """Initialize the modules."""
     # go throught all known modules to probe the content-types
     # (do this only once)
     for mod in _modules:
-        parser = __import__('parsers.'+mod, globals(), locals(), [mod])
+        parser = __import__('parsers.' + mod, globals(), locals(), [mod])
         for mimetype in parser.mimetypes:
             _parsermodules[mimetype] = parser
 
+
 def get_parsermodule(mimetype):
     """Look up the correct module for the specified mimetype."""
     if _parsermodules == {}:
         _init_modules()
     # check if we have a supported content-type
-    if _parsermodules.has_key(mimetype):
+    if mimetype in _parsermodules:
         return _parsermodules[mimetype]
     return None
 
+
 def get_mimetypes():
     """Return a list of supported mime types that can be parsed
     by the installed parsers."""

Modified: webcheck/parsers/css.py
==============================================================================
--- webcheck/parsers/css.py     Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/parsers/css.py     Thu Aug 18 23:22:26 2011        (r427)
@@ -30,15 +30,16 @@
 import re
 
 # pattern for matching /* ... */ comments in css
-_commentpattern = re.compile('/\*.*?\*/', re.IGNORECASE|re.DOTALL)
+_commentpattern = re.compile('/\*.*?\*/', re.IGNORECASE | re.DOTALL)
 
 # pattern for matching @import "url" statments in css
 _importpattern = re.compile('@import\s+["\']([^"\']*)["\']',
-                            re.IGNORECASE|re.DOTALL)
+                            re.IGNORECASE | re.DOTALL)
 
 # pattern for matching url(...) in css
 _urlpattern = re.compile('url\(["\']?(.*?)["\']?\)')
 
+
 def parse(content, link, base=None):
     """Parse the specified content and extract information for crawling the
     site further."""

Modified: webcheck/parsers/html/__init__.py
==============================================================================
--- webcheck/parsers/html/__init__.py   Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/parsers/html/__init__.py   Thu Aug 18 23:22:26 2011        (r427)
@@ -35,6 +35,7 @@
 # pattern for matching all html entities
 _entitypattern = re.compile('&(#[0-9]{1,6}|[a-zA-Z]{2,10});')
 
+
 def htmlescape(txt, inattr=False):
     """HTML escape the given string and return an ASCII clean string with
     known entities and character entities for the other values.
@@ -54,21 +55,22 @@
                 out += '&%s;' % htmlentitydefs.codepoint2name[ord(c)]
             else:
                 out += '"'
-        elif htmlentitydefs.codepoint2name.has_key(ord(c)):
+        elif ord(c) in htmlentitydefs.codepoint2name:
             out += '&%s;' % htmlentitydefs.codepoint2name[ord(c)]
         elif ord(c) > 126:
-            out += '&#%d;'% ord(c)
+            out += '&#%d;' % ord(c)
         elif inattr and c == u'\n':
             out += '
'
         else:
             out += c.encode('utf-8')
     return out
 
+
 def _unescape_entity(match):
     """Helper function for htmlunescape().
     This funcion unescapes a html entity, it is passed to the sub()
     function."""
-    if htmlentitydefs.name2codepoint.has_key(match.group(1)):
+    if match.group(1) in htmlentitydefs.name2codepoint:
         # we have a named entity, return proper character
         return unichr(htmlentitydefs.name2codepoint[match.group(1)])
     elif match.group(1)[0] == '#':
@@ -78,6 +80,7 @@
         # we have something else, just keep the original
         return match.group(0)
 
+
 def htmlunescape(txt):
     """This function unescapes a html encoded string.
     This function returns a unicode string."""
@@ -92,6 +95,7 @@
     # we're done
     return txt
 
+
 def _parsefunction(content, link):
     # we find a suitable parse function
     global _parsefunction
@@ -102,12 +106,14 @@
         _parsefunction = parsers.html.beautifulsoup.parse
     except ImportError:
         # fall back to legacy HTMLParser parser
-        debugio.warn('falling back to the legacy HTML parser, consider 
installing BeautifulSoup')
+        debugio.warn('falling back to the legacy HTML parser, '
+                     'consider installing BeautifulSoup')
         import parsers.html.htmlparser
         _parsefunction = parsers.html.htmlparser.parse
     # call the actual parse function
     _parsefunction(content, link)
 
+
 def parse(content, link):
     """Parse the specified content and extract an url list, a list of images a
     title and an author. The content is assumed to contain HMTL."""

Modified: webcheck/parsers/html/beautifulsoup.py
==============================================================================
--- webcheck/parsers/html/beautifulsoup.py      Wed Aug 10 22:42:57 2011        
(r426)
+++ webcheck/parsers/html/beautifulsoup.py      Thu Aug 18 23:22:26 2011        
(r427)
@@ -40,7 +40,9 @@
 # check BeautifulSoup find() function for bugs
 if BeautifulSoup.BeautifulSoup('<foo>').find('foo', bar=True):
     import debugio
-    debugio.warn('using buggy version of BeautifulSoup (%s)' % 
BeautifulSoup.__version__)
+    debugio.warn('using buggy version of BeautifulSoup (%s)' %
+                 BeautifulSoup.__version__)
+
 
 def parse(content, link):
     """Parse the specified content and extract an url list, a list of images a
@@ -67,21 +69,24 @@
         base = link.url
     # <link rel="TYPE" href="URL">
     for l in soup.findAll('link', rel=True, href=True):
-        if l['rel'].lower() in ('stylesheet', 'alternate stylesheet', 'icon', 
'shortcut icon'):
+        if l['rel'].lower() in ('stylesheet', 'alternate stylesheet', 'icon',
+                                'shortcut icon'):
             embed = myurllib.normalizeurl(htmlunescape(l['href']).strip())
             if embed:
                 link.add_embed(urlparse.urljoin(base, embed))
     # <meta name="author" content="AUTHOR">
-    author = soup.find('meta', attrs={'name': re.compile("^author$", re.I), 
'content': True})
+    author = soup.find('meta', attrs={'name': re.compile("^author$", re.I),
+                                      'content': True})
     if author and author['content']:
         link.author = htmlunescape(author['content']).strip()
     # <meta http-equiv="refresh" content="0;url=URL">
-    refresh = soup.find('meta', attrs={'http-equiv': _refreshhttpequivpattern, 
'content': True})
+    refresh = soup.find('meta', attrs={'http-equiv': _refreshhttpequivpattern,
+                                       'content': True})
     if refresh and refresh['content']:
         try:
             child = _refershcontentpattern.search(refresh['content']).group(1)
         except AttributeError:
-            pass # ignore cases where refresh header parsing causes problems
+            pass  # ignore cases where refresh header parsing causes problems
         else:
             link.add_child(urlparse.urljoin(base, child))
     # <img src="URL">
@@ -100,7 +105,8 @@
         # get anchor name
         a_name = myurllib.normalizeurl(htmlunescape(a['name']).strip())
         # if both id and name are used they should be the same
-        if a.has_key('id') and a_name != 
myurllib.normalizeurl(htmlunescape(a['id']).strip()):
+        if 'id' in a and \
+           a_name != myurllib.normalizeurl(htmlunescape(a['id']).strip()):
             link.add_pageproblem(
               'anchors defined in name and id attributes do not match')
             # add the id anchor anyway
@@ -110,7 +116,7 @@
     # <ANY id="ID">
     for elem in soup.findAll(id=True):
         # skip anchor that have a name
-        if elem.name == 'a' and elem.has_key('name'):
+        if elem.name == 'a' and 'name' in elem:
             continue
         # add the anchor
         
link.add_anchor(myurllib.normalizeurl(htmlunescape(elem['id']).strip()))
@@ -142,7 +148,7 @@
     # <applet code="URL" [archive="URL"]...>
     for applet in soup.findAll('applet', code=True):
         # if applet has archive tag check that
-        if applet.has_key('archive'):
+        if 'archive' in applet:
             embed = 
myurllib.normalizeurl(htmlunescape(applet['archive']).strip())
         else:
             embed = myurllib.normalizeurl(htmlunescape(applet['code']).strip())
@@ -154,7 +160,9 @@
         if embed:
             link.add_embed(urlparse.urljoin(base, embed))
     # <embed><param name="movie" value="url"></embed>
-    for param in soup.findAll('param', attrs={'name': re.compile("^movie$", 
re.I), 'value': True}):
+    for param in soup.findAll('param', attrs={
+                  'name': re.compile("^movie$", re.I),
+                  'value': True}):
         embed = myurllib.normalizeurl(htmlunescape(param['value']).strip())
         if embed:
             link.add_embed(urlparse.urljoin(base, embed))
@@ -175,7 +183,7 @@
         if embed:
             link.add_embed(urlparse.urljoin(base, embed))
     # <body|table|td background="url">
-    for t in soup.findAll( ('body', 'table', 'td'), background=True):
+    for t in soup.findAll(('body', 'table', 'td'), background=True):
         embed = myurllib.normalizeurl(htmlunescape(t['background']).strip())
         if embed:
             link.add_embed(urlparse.urljoin(base, embed))

Modified: webcheck/parsers/html/calltidy.py
==============================================================================
--- webcheck/parsers/html/calltidy.py   Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/parsers/html/calltidy.py   Thu Aug 18 23:22:26 2011        (r427)
@@ -24,6 +24,7 @@
 import config
 import parsers.html
 
+
 def parse(content, link):
     """Parse the specified content with tidy and add any errors to the
     link."""

Modified: webcheck/parsers/html/htmlparser.py
==============================================================================
--- webcheck/parsers/html/htmlparser.py Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/parsers/html/htmlparser.py Thu Aug 18 23:22:26 2011        (r427)
@@ -45,6 +45,7 @@
 # pattern for matching the encoding part of an xml declaration
 _encodingpattern = re.compile('^xml .*encoding="([^"]*)"', re.I)
 
+
 class _MyHTMLParser(HTMLParser.HTMLParser):
     """A simple subclass of HTMLParser.HTMLParser continuing after errors
     and gathering some information from the parsed content."""
@@ -81,9 +82,9 @@
         # (characters are escaped in myurllib.normalizeurl())
         if _spacepattern.search(url):
             self.link.add_pageproblem(
-              what+' contains unescaped spaces: '+url+', '+self._location() )
+              what + ' contains unescaped spaces: ' + url + ', ' + 
self._location())
         # replace &#nnn; entity refs with proper characters
-        url = _charentitypattern.sub(lambda x:chr(int(x.group(1))), url)
+        url = _charentitypattern.sub(lambda x: chr(int(x.group(1))), url)
         return myurllib.normalizeurl(url)
 
     def error(self, message):
@@ -91,7 +92,7 @@
         # construct error message
         message += ', ' + self._location()
         # store error message
-        debugio.debug('parsers.html.htmlparser._MyHTMLParser.error(): problem 
parsing html: '+message)
+        debugio.debug('parsers.html.htmlparser._MyHTMLParser.error(): problem 
parsing html: ' + message)
         if self.errmsg is None:
             self.errmsg = message
         # increment error count
@@ -115,48 +116,48 @@
         if tag == 'title':
             self.collect = ''
         # <base href="URL">
-        elif tag == 'base' and attrs.has_key('href'):
+        elif tag == 'base' and 'href' in attrs:
             self.base = self._cleanurl(attrs['href'])
         # <link rel="type" href="URL">
-        elif tag == 'link' and attrs.has_key('rel') and attrs.has_key('href'):
+        elif tag == 'link' and 'rel' in attrs and 'href' in attrs:
             if attrs['rel'].lower() in ('stylesheet', 'alternate stylesheet', 
'icon', 'shortcut icon'):
                 self.embedded.append(self._cleanurl(attrs['href']))
         # <meta name="author" content="AUTHOR">
-        elif tag == 'meta' and attrs.has_key('name') and 
attrs.has_key('content') and attrs['name'].lower() == 'author':
+        elif tag == 'meta' and 'name' in attrs and 'content' in attrs and 
attrs['name'].lower() == 'author':
             if self.author is None:
                 self.author = attrs['content']
         # <meta http-equiv="refresh" content="0;url=URL">
-        elif tag == 'meta' and attrs.has_key('http-equiv') and 
attrs.has_key('content') and attrs['http-equiv'].lower() == 'refresh':
-            pass # TODO: implement
+        elif tag == 'meta' and 'http-equiv' in attrs and 'content' in attrs 
and attrs['http-equiv'].lower() == 'refresh':
+            pass  # TODO: implement
         # <meta http-equiv="content-type" content="text/html; charset=utf-8" />
-        elif tag == 'meta' and attrs.has_key('http-equiv') and 
attrs.has_key('content') and attrs['http-equiv'].lower() == 'content-type':
+        elif tag == 'meta' and 'http-equiv' in attrs and 'content' in attrs 
and attrs['http-equiv'].lower() == 'content-type':
             try:
                 
self.link.set_encoding(_charsetpattern.search(attrs['content']).group(1))
             except AttributeError:
                 # ignore cases where encoding is not set in header
                 pass
         # <img src="url">
-        elif tag == 'img' and attrs.has_key('src'):
+        elif tag == 'img' and 'src' in attrs:
             self.embedded.append(self._cleanurl(attrs['src']))
         # <a href="url" name="anchor" id="anchor">
         elif tag == 'a':
             # <a href="url">
-            if attrs.has_key('href'):
+            if 'href' in attrs:
                 self.children.append(self._cleanurl(attrs['href']))
             # <a name="anchor">
             a_name = None
-            if attrs.has_key('name'):
+            if 'name' in attrs:
                 a_name = self._cleanurl(attrs['name'], 'anchor')
             # <a id="anchor">
             a_id = None
-            if attrs.has_key('id'):
+            if 'id' in attrs:
                 a_id = self._cleanurl(attrs['id'], 'anchor')
             # if both id and name are used they should be the same
             if a_id and a_name and a_id != a_name:
                 # add problem
                 self.link.add_pageproblem(
                   'anchors defined in name and id attributes do not match 
%(location)s'
-                  % { 'location': self._location() })
+                  % {'location': self._location()})
             elif a_id == a_name:
                 # ignore id if it's the same as name
                 a_id = None
@@ -165,8 +166,8 @@
                 if a_name in self.anchors:
                     self.link.add_pageproblem(
                       'anchor "%(anchor)s" defined again %(location)s'
-                      % { 'anchor':   a_name,
-                          'location': self._location() })
+                      % {'anchor':   a_name,
+                         'location': self._location()})
                 else:
                     self.anchors.append(a_name)
             # <a id="anchor">
@@ -174,40 +175,40 @@
                 if a_id in self.anchors:
                     self.link.add_pageproblem(
                       'anchor "%(anchor)s" defined again %(location)s'
-                      % { 'anchor':   a_id,
-                          'location': self._location() })
+                      % {'anchor':   a_id,
+                         'location': self._location()})
                 else:
                     self.anchors.append(a_id)
         # <frameset><frame src="url"...>...</frameset>
-        elif tag == 'frame' and attrs.has_key('src'):
+        elif tag == 'frame' and 'src' in attrs:
             self.embedded.append(self._cleanurl(attrs['src']))
         # <map><area href="url"...>...</map>
-        elif tag == 'area' and attrs.has_key('href'):
+        elif tag == 'area' and 'href' in attrs:
             self.children.append(self._cleanurl(attrs['href']))
         # <applet archive="URL"...>
-        elif tag == 'applet' and attrs.has_key('archive'):
+        elif tag == 'applet' and 'archive' in attrs:
             self.embedded.append(self._cleanurl(attrs['archive']))
         # <applet code="URL"...>
-        elif tag == 'applet' and attrs.has_key('code'):
+        elif tag == 'applet' and 'code' in attrs:
             self.embedded.append(self._cleanurl(attrs['code']))
         # <embed src="url"...>
-        elif tag == 'embed' and attrs.has_key('src'):
+        elif tag == 'embed' and 'src' in attrs:
             self.embedded.append(self._cleanurl(attrs['src']))
         # <embed><param name="movie" value="url"></embed>
-        elif tag == 'param' and attrs.has_key('name') and 
attrs.has_key('value'):
+        elif tag == 'param' and 'name' in attrs and 'value' in attrs:
             if attrs['name'].lower() == 'movie':
                 self.embedded.append(self._cleanurl(attrs['value']))
         # <style>content</style>
         elif tag == 'style':
             self.collect = ''
         # <script src="url">
-        elif tag == 'script' and attrs.has_key('src'):
+        elif tag == 'script' and 'src' in attrs:
             self.embedded.append(self._cleanurl(attrs['src']))
         # <body|table|td background="url">
-        elif tag in ('body', 'table', 'td') and attrs.has_key('background'):
+        elif tag in ('body', 'table', 'td') and 'background' in attrs:
             self.embedded.append(self._cleanurl(attrs['background']))
         # pick up any tags with a style attribute
-        if attrs.has_key('style'):
+        if 'style' in attrs:
             # delegate handling of inline css to css module
             import parsers.css
             parsers.css.parse(attrs['style'], self.link, self.base)
@@ -230,13 +231,13 @@
     def handle_charref(self, name):
         """Handle character references (e.g. &#65;) by passing the data to
         handle_data()."""
-        self.handle_data('&#'+name+';')
+        self.handle_data('&#' + name + ';')
         # TODO: do not pass ; if plain text does not contain it?
 
     def handle_entityref(self, name):
         """Handle entity references (e.g. &eacute;) by passing the data to
         handle_data()."""
-        self.handle_data('&'+name+';')
+        self.handle_data('&' + name + ';')
         # TODO: do not pass ; if plain text does not contain it?
 
     def handle_pi(self, data):
@@ -247,6 +248,7 @@
         except AttributeError:
             pass
 
+
 def _maketxt(txt, encoding):
     """Return an unicode text of the specified string do correct character
     conversions and replacing html entities with normal characters."""
@@ -259,6 +261,7 @@
     # fall back to locale's encoding
     return htmlunescape(unicode(txt, errors='replace'))
 
+
 def parse(content, link):
     """Parse the specified content and extract an url list, a list of images a
     title and an author. The content is assumed to contain HMTL."""

Modified: webcheck/plugins/__init__.py
==============================================================================
--- webcheck/plugins/__init__.py        Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/__init__.py        Thu Aug 18 23:22:26 2011        (r427)
@@ -54,12 +54,14 @@
 # reference function from html module
 htmlescape = parsers.html.htmlescape
 
+
 def get_title(link):
     """Returns the title of a link if it is set otherwise returns url."""
     if link.title is None or link.title == '':
         return link.url
     return link.title
 
+
 def _floatformat(f):
     """Return a float as a string while trying to keep it within three
     characters."""
@@ -69,26 +71,29 @@
         txt = txt[:txt.find('.')]
     return txt
 
+
 def get_size(i):
     """Return the size in bytes as a readble string."""
     K = 1024
-    M = K*1024
-    G = M*1024
-    if i > 1024*1024*999:
-        return _floatformat(float(i)/float(G))+'G'
-    elif i > 1024*999:
-        return _floatformat(float(i)/float(M))+'M'
+    M = K * 1024
+    G = M * 1024
+    if i > 1024 * 1024 * 999:
+        return _floatformat(float(i) / float(G)) + 'G'
+    elif i > 1024 * 999:
+        return _floatformat(float(i) / float(M)) + 'M'
     elif i >= 1024:
-        return _floatformat(float(i)/float(K))+'K'
+        return _floatformat(float(i) / float(K)) + 'K'
     else:
         return '%d' % i
 
+
 def _mk_unicode(txt):
     """Returns a unicode instance of the string."""
     if not isinstance(txt, unicode):
         txt = unicode(txt)
     return txt
 
+
 def get_info(link):
     """Return a string with a summary of the information in the link."""
     info = u'url: %s\n' % _mk_unicode(link.url)
@@ -133,6 +138,7 @@
     # trim trailing newline
     return info.strip()
 
+
 def make_link(link, title=None):
     """Return an <a>nchor to a url with title. If url is in the Linklist and
     is external, insert "class=external" in the <a> tag."""
@@ -147,7 +153,13 @@
     if config.REPORT_LINKS_IN_NEW_WINDOW:
         target = 'target="_blank" '
     # gather some information about the link to report
-    return '<a href="'+htmlescape(link.url, True)+'" 
'+target+'class="'+cssclass+'" title="'+htmlescape(get_info(link), 
True)+'">'+htmlescape(title)+'</a>'
+    return '<a href="%(url)s" %(target)sclass="%(cssclass)s" 
title="%(info)s">%(title)s</a>' % \
+            dict(url=htmlescape(link.url, True),
+                 target=target,
+                 cssclass=cssclass,
+                 info=htmlescape(get_info(link), True),
+                 title=htmlescape(title))
+
 
 def print_parents(fp, link, indent='     '):
     """Write a list of parents to the output file descriptor.
@@ -158,24 +170,25 @@
         return
     parents.sort(lambda a, b: cmp(a.title, b.title) or cmp(a.url, b.url))
     fp.write(
-      indent+'<div class="parents">\n'+
-      indent+' referenced from:\n'+
-      indent+' <ul>\n' )
+      indent + '<div class="parents">\n' +
+      indent + ' referenced from:\n' +
+      indent + ' <ul>\n')
     more = 0
     if len(parents) > config.PARENT_LISTLEN + 1:
         more = len(parents) - config.PARENT_LISTLEN
         parents = parents[:config.PARENT_LISTLEN]
     for parent in parents:
         fp.write(
-          indent+'  <li>%(parent)s</li>\n'
-          % { 'parent': make_link(parent) })
+          indent + '  <li>%(parent)s</li>\n'
+          % {'parent': make_link(parent)})
     if more:
         fp.write(
-          indent+'  <li>%(more)d more...</li>\n'
-          % { 'more': more })
+          indent + '  <li>%(more)d more...</li>\n'
+          % {'more': more})
     fp.write(
-      indent+' </ul>\n'+
-      indent+'</div>\n' )
+      indent + ' </ul>\n' +
+      indent + '</div>\n')
+
 
 def open_file(filename, istext=True, makebackup=False):
     """This returns an open file object which can be used for writing. This
@@ -189,8 +202,8 @@
             os.mkdir(config.OUTPUT_DIR)
         except OSError, (errno, strerror):
             debugio.error('error creating directory %(dir)s: %(strerror)s' %
-                          { 'dir': config.OUTPUT_DIR,
-                            'strerror': strerror })
+                          {'dir': config.OUTPUT_DIR,
+                           'strerror': strerror})
             sys.exit(1)
     # build the output file name
     fname = os.path.join(config.OUTPUT_DIR, filename)
@@ -198,7 +211,7 @@
     if os.path.exists(fname):
         if makebackup:
             # create backup of original (overwriting previous backup)
-            os.rename(fname, fname+'~')
+            os.rename(fname, fname + '~')
         elif not config.OVERWRITE_FILES:
             # ask to overwrite
             try:
@@ -221,10 +234,11 @@
             return open(fname, 'wb')
     except IOError, (errno, strerror):
         debugio.error('error creating output file %(fname)s: %(strerror)s' %
-                      { 'fname': fname,
-                        'strerror': strerror })
+                      {'fname': fname,
+                       'strerror': strerror})
         sys.exit(1)
 
+
 def _print_navbar(fp, plugin):
     """Return an html fragement representing the navigation bar for a page."""
     fp.write('  <ul class="navbar">\n')
@@ -240,12 +254,13 @@
             selected = ' class="selected"'
         fp.write(
           '   <li><a href="%(pluginfile)s"%(selected)s 
title="%(description)s">%(title)s</a></li>\n'
-          % { 'pluginfile' : report.__outputfile__,
-              'selected'   : selected,
-              'title'      : htmlescape(report.__title__),
-              'description': htmlescape(report.__doc__) })
+          % {'pluginfile':  report.__outputfile__,
+             'selected':    selected,
+             'title':       htmlescape(report.__title__),
+             'description': htmlescape(report.__doc__)})
     fp.write('  </ul>\n')
 
+
 def open_html(plugin, site):
     """Print an html fragment for the start of an html page."""
     # open the file
@@ -268,10 +283,10 @@
       ' </head>\n'
       ' <body>\n'
       '  <h1 class="basename">Webcheck report for <a 
href="%(siteurl)s">%(sitetitle)s</a></h1>\n'
-      % { 'sitetitle':  htmlescape(get_title(base)),
-          'plugintitle': htmlescape(plugin.__title__),
-          'siteurl':    base.url,
-          'version':    config.VERSION })
+      % {'sitetitle':   htmlescape(get_title(base)),
+         'plugintitle': htmlescape(plugin.__title__),
+         'siteurl':     base.url,
+         'version':     config.VERSION})
     # write navigation bar
     _print_navbar(fp, plugin)
     # write plugin heading
@@ -280,6 +295,7 @@
     fp.write('  <div class="content">\n')
     return fp
 
+
 def close_html(fp):
     """Print an html fragment as footer of an html page."""
     fp.write('  </div>\n')
@@ -290,11 +306,12 @@
       '  </p>\n'
       ' </body>\n'
       '</html>\n'
-      % { 'time':     htmlescape(time.ctime(time.time())),
-          'homepage': config.HOMEPAGE,
-          'version':  htmlescape(config.VERSION) })
+      % {'time':     htmlescape(time.ctime(time.time())),
+         'homepage': config.HOMEPAGE,
+         'version':  htmlescape(config.VERSION)})
     fp.close()
 
+
 def generate(site):
     """Generate pages for plugins."""
     for p in config.PLUGINS:

Modified: webcheck/plugins/about.py
==============================================================================
--- webcheck/plugins/about.py   Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/about.py   Thu Aug 18 23:22:26 2011        (r427)
@@ -55,10 +55,10 @@
       '    This report was generated on %(time)s, a total of %(numurls)d\n'
       '    links were found.\n'
       '   </p>\n\n'
-      % { 'version':  plugins.htmlescape(config.VERSION),
-          'time':     plugins.htmlescape(time.ctime(time.time())),
-          'numurls':  site.links.count(),
-          'homepage': config.HOMEPAGE } )
+      % {'version':  plugins.htmlescape(config.VERSION),
+         'time':     plugins.htmlescape(time.ctime(time.time())),
+         'numurls':  site.links.count(),
+         'homepage': config.HOMEPAGE})
     # output copyright information
     fp.write(
       '   <h3>Copyright</h3>\n'
@@ -94,7 +94,7 @@
       '    Copyright &copy; 2003-2005 Stuart Langridge, Paul McLanahan,\n'
       '    Peter Janes, Brad Choate, Dunstan Orchard, Ethan Marcotte,\n'
       '    Mark Wubben and Victor Kulinski\n'
-      '   </p>\n\n' )
+      '   </p>\n\n')
     # output plugin information
     fp.write(
       '   <h3>Plugins</h3>\n'
@@ -104,10 +104,10 @@
         fp.write(
           '    <li>\n'
           '     <strong>%s</strong><br />\n'
-          % plugins.htmlescape(report.__title__) )
+          % plugins.htmlescape(report.__title__))
         if hasattr(report, '__doc__'):
             fp.write('     %s<br />\n' % plugins.htmlescape(report.__doc__))
         fp.write('    </li>\n')
     fp.write(
-      '   </ul>\n' )
+      '   </ul>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/anchors.py
==============================================================================
--- webcheck/plugins/anchors.py Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/anchors.py Thu Aug 18 23:22:26 2011        (r427)
@@ -45,5 +45,5 @@
                 anchor.parent.add_pageproblem(
                   u'bad link: %(url)s#%(anchor)s: unknown anchor'
                   % {'url': link.url,
-                     'anchor': anchor })
+                     'anchor': anchor})
     # FIXME: commit changes in session

Modified: webcheck/plugins/badlinks.py
==============================================================================
--- webcheck/plugins/badlinks.py        Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/badlinks.py        Thu Aug 18 23:22:26 2011        (r427)
@@ -42,26 +42,26 @@
         fp.write(
           '   <p class="description">\n'
           '    There were no problems retrieving links from the website.\n'
-          '   </p>\n' )
+          '   </p>\n')
         plugins.close_html(fp)
         return
     fp.write(
       '   <p class="description">\n'
       '    These links could not be retrieved during the crawling of the 
website.\n'
       '   </p>\n'
-      '   <ol>\n' )
+      '   <ol>\n')
     for link in links:
         # list the link
         fp.write(
           '    <li>\n'
           '     %(badurl)s\n'
           '     <ul class="problems">\n'
-          % { 'badurl':  plugins.make_link(link,link.url) })
+          % {'badurl':  plugins.make_link(link, link.url)})
         # list the problems
         for problem in link.linkproblems:
             fp.write(
               '      <li>%(problem)s</li>\n'
-              % { 'problem':  plugins.htmlescape(problem) })
+              % {'problem':  plugins.htmlescape(problem)})
         fp.write(
           '     </ul>\n')
         # present a list of parents
@@ -73,5 +73,5 @@
         fp.write(
           '    </li>\n')
     fp.write(
-      '   </ol>\n' )
+      '   </ol>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/external.py
==============================================================================
--- webcheck/plugins/external.py        Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/external.py        Thu Aug 18 23:22:26 2011        (r427)
@@ -42,7 +42,7 @@
         fp.write(
           '   <p class="description">'
           '    No external links were found on the website.'
-          '   </p>\n' )
+          '   </p>\n')
         plugins.close_html(fp)
         return
     fp.write(
@@ -50,16 +50,16 @@
       '    This is the list of all external urls encountered during the'
       '    examination of the website.'
       '   </p>\n'
-      '   <ol>\n' )
+      '   <ol>\n')
     for link in links:
         fp.write(
           '    <li>\n'
           '     %(link)s\n'
-          % { 'link':  plugins.make_link(link) })
+          % {'link': plugins.make_link(link)})
         # present a list of parents
         plugins.print_parents(fp, link, '     ')
         fp.write(
           '    </li>\n')
     fp.write(
-      '   </ol>\n' )
+      '   </ol>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/images.py
==============================================================================
--- webcheck/plugins/images.py  Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/images.py  Thu Aug 18 23:22:26 2011        (r427)
@@ -47,16 +47,16 @@
           '   <p class="description">\n'
           '    No images were linked on the website.\n'
           '   </p>\n'
-          '   <ol>\n' )
+          '   <ol>\n')
         plugins.close_html(fp)
         return
     fp.write(
       '   <p class="description">\n'
       '    This is the list of all images found linked on the website.\n'
       '   </p>\n'
-      '   <ol>\n' )
+      '   <ol>\n')
     for link in links:
         fp.write('    <li>%s</li>\n' % plugins.make_link(link, link.url))
     fp.write(
-      '   </ol>\n' )
+      '   </ol>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/new.py
==============================================================================
--- webcheck/plugins/new.py     Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/new.py     Thu Aug 18 23:22:26 2011        (r427)
@@ -37,6 +37,7 @@
 
 SECS_PER_DAY = 60 * 60 * 24
 
+
 def generate(site):
     """Output the list of recently modified pages to the specified file 
descriptor."""
     # the time for which links are considered new
@@ -51,7 +52,7 @@
           '   <p class="description">\n'
           '    No pages were found that were modified within the last %(new)d 
days.\n'
           '   </p>\n'
-          % { 'new': config.REPORT_WHATSNEW_URL_AGE })
+          % {'new': config.REPORT_WHATSNEW_URL_AGE})
         plugins.close_html(fp)
         return
     fp.write(
@@ -59,9 +60,9 @@
       '    These pages have been recently modified (within %(new)d days).\n'
       '   </p>\n'
       '   <ul>\n'
-      % { 'new': config.REPORT_WHATSNEW_URL_AGE })
+      % {'new': config.REPORT_WHATSNEW_URL_AGE})
     for link in links:
-        age = (time.time()-link.mtime)/SECS_PER_DAY
+        age = (time.time() - link.mtime) / SECS_PER_DAY
         fp.write(
           '    <li>\n'
           '     %(link)s\n'
@@ -69,7 +70,7 @@
           '      <li>age: %(age)d days</li>\n'
           '     </ul>\n'
           '    </li>\n'
-          % { 'link':  plugins.make_link(link),
-              'age':   age })
+          % {'link': plugins.make_link(link),
+             'age':  age})
     fp.write('   </ul>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/notchkd.py
==============================================================================
--- webcheck/plugins/notchkd.py Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/notchkd.py Thu Aug 18 23:22:26 2011        (r427)
@@ -42,7 +42,7 @@
         fp.write(
           '   <p class="description">\n'
           '    All links have been checked.\n'
-          '   </p>\n' )
+          '   </p>\n')
         plugins.close_html(fp)
         return
     fp.write(
@@ -55,11 +55,11 @@
         fp.write(
           '    <li>\n'
           '     %(link)s\n'
-          % { 'link': plugins.make_link(link, link.url) })
+          % {'link': plugins.make_link(link, link.url)})
         # present a list of parents
         plugins.print_parents(fp, link, '     ')
         fp.write(
           '    </li>\n')
     fp.write(
-      '   </ol>\n' )
+      '   </ol>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/notitles.py
==============================================================================
--- webcheck/plugins/notitles.py        Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/notitles.py        Thu Aug 18 23:22:26 2011        (r427)
@@ -40,14 +40,14 @@
     # get all internal pages without a title
     links = site.links.filter_by(is_page=True, is_internal=True)
     links = links.filter(or_(char_length(db.Link.title) == 0,
-                             db.Link.title ==None)).order_by(db.Link.url)
+                             db.Link.title == None)).order_by(db.Link.url)
     # present results
     fp = plugins.open_html(plugins.notitles, site)
     if not links:
         fp.write(
           '   <p class="description">\n'
           '    All pages had a title specified.\n'
-          '   </p>\n' )
+          '   </p>\n')
         plugins.close_html(fp)
         return
     fp.write(
@@ -59,8 +59,8 @@
     for link in links:
         fp.write(
           '    <li>%(link)s</li>\n'
-          % { 'link': plugins.make_link(link,link.url) })
+          % {'link': plugins.make_link(link, link.url)})
         link.add_pageproblem('missing title')
     fp.write(
-      '   </ol>\n' )
+      '   </ol>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/old.py
==============================================================================
--- webcheck/plugins/old.py     Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/old.py     Thu Aug 18 23:22:26 2011        (r427)
@@ -37,6 +37,7 @@
 
 SECS_PER_DAY = 60 * 60 * 24
 
+
 def generate(site):
     """Output the list of outdated pages to the specified file descriptor."""
     # the time for which links are considered old
@@ -51,7 +52,7 @@
           '   <p class="description">\n'
           '    No pages were found that were older than %(old)d days old.\n'
           '   </p>\n'
-          % { 'old': config.REPORT_WHATSOLD_URL_AGE })
+          % {'old': config.REPORT_WHATSOLD_URL_AGE})
         plugins.close_html(fp)
         return
     fp.write(
@@ -60,7 +61,7 @@
       '    days) and may be outdated.\n'
       '   </p>\n'
       '   <ul>\n'
-      % {'old': config.REPORT_WHATSOLD_URL_AGE })
+      % {'old': config.REPORT_WHATSOLD_URL_AGE})
     for link in links:
         age = (time.time() - link.mtime) / SECS_PER_DAY
         fp.write(
@@ -70,10 +71,10 @@
           '      <li>age: %(age)d days</li>\n'
           '     </ul>\n'
           '    </li>\n'
-          % { 'link':  plugins.make_link(link),
-              'age':   age })
+          % {'link': plugins.make_link(link),
+             'age':  age})
         # add link to problem database
         link.add_pageproblem('this page is %d days old' % age)
     fp.write(
-      '   </ul>\n' )
+      '   </ul>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/problems.py
==============================================================================
--- webcheck/plugins/problems.py        Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/problems.py        Thu Aug 18 23:22:26 2011        (r427)
@@ -41,12 +41,13 @@
     name = name.lower()
     import re
     # strip any leading non alpha characters
-    name = re.sub('^[^a-z]*','',name)
+    name = re.sub('^[^a-z]*', '', name)
     # remove any non-allowed characters
-    name = re.sub('[^a-z0-9_:.]+','-',name)
+    name = re.sub('[^a-z0-9_:.]+', '-', name)
     # we're done
     return name
 
+
 def generate(site):
     """Output the overview of problems to the given file descriptor."""
     # make a list of problems per author
@@ -61,7 +62,7 @@
         else:
             author = unicode('Unknown')
         # store the problem
-        if problem_db.has_key(author):
+        if author in problem_db:
             problem_db[author].append(link)
         else:
             problem_db[author] = [link]
@@ -70,7 +71,7 @@
         fp.write(
           '   <p class="description">\n'
           '    No problems were found on this site, hurray.\n'
-          '   </p>\n' )
+          '   </p>\n')
         plugins.close_html(fp)
         return
     # print description
@@ -78,7 +79,7 @@
       '   <p class="description">\n'
       '    This is an overview of all the problems on the site, grouped by\n'
       '    author.\n'
-      '   </p>\n' )
+      '   </p>\n')
     # get a list of authors
     authors = problem_db.keys()
     authors.sort()
@@ -88,8 +89,8 @@
         for author in authors:
             fp.write(
               '    <li><a href="#author_%(authorref)s">Author: 
%(author)s</a></li>\n'
-              % { 'authorref': plugins.htmlescape(_mk_id(author)),
-                  'author':    plugins.htmlescape(author) })
+              % {'authorref': plugins.htmlescape(_mk_id(author)),
+                 'author':    plugins.htmlescape(author)})
         fp.write('   </ul>\n')
     # generate problem report
     fp.write('   <ul>\n')
@@ -98,8 +99,8 @@
           '     <li id="author_%(authorref)s">\n'
           '      Author: %(author)s\n'
           '      <ul>\n'
-          % { 'authorref': plugins.htmlescape(_mk_id(author)),
-              'author':    plugins.htmlescape(author) })
+          % {'authorref': plugins.htmlescape(_mk_id(author)),
+             'author':    plugins.htmlescape(author)})
         # sort pages by url
         problem_db[author].sort(lambda a, b: cmp(a.url, b.url))
         # list problems for this author
@@ -109,19 +110,19 @@
               '    <li>\n'
               '     %(link)s\n'
               '     <ul class="problems">\n'
-              % { 'link':    plugins.make_link(link) })
+              % {'link': plugins.make_link(link)})
             # list the problems
             for problem in link.pageproblems.order_by(db.PageProblem.message):
                 fp.write(
                   '      <li>%(problem)s</li>\n'
-                  % { 'problem':  plugins.htmlescape(problem) })
+                  % {'problem':  plugins.htmlescape(problem)})
             # end the list item
             fp.write(
               '     </ul>\n'
-              '    </li>\n' )
+              '    </li>\n')
         fp.write(
           '      </ul>\n'
-          '     </li>\n' )
+          '     </li>\n')
     fp.write(
-      '   </ul>\n' )
+      '   </ul>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/sitemap.py
==============================================================================
--- webcheck/plugins/sitemap.py Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/sitemap.py Thu Aug 18 23:22:26 2011        (r427)
@@ -52,6 +52,7 @@
            (embed.depth == None or embed.depth > link.depth):
             add_pagechildren(embed, children, explored)
 
+
 def _explore(fp, link, explored, depth=0, indent='    '):
     """Recursively do a breadth first traversal of the graph of links on the
     site. Prints the html results to the file descriptor."""
@@ -76,6 +77,7 @@
             fp.write(indent + ' </ul>\n')
     fp.write(indent + '</li>\n')
 
+
 def generate(site):
     """Output the sitemap to the specified file descriptor."""
     fp = plugins.open_html(plugins.sitemap, site)
@@ -84,10 +86,10 @@
       '   <p class="description">\n'
       '    This an overview of the crawled site.\n'
       '   </p>\n'
-      '   <ul>\n' )
+      '   <ul>\n')
     explored = set(x.id for x in site.bases)
     for l in site.bases:
         _explore(fp, l, explored)
     fp.write(
-      '   </ul>\n' )
+      '   </ul>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/size.py
==============================================================================
--- webcheck/plugins/size.py    Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/size.py    Thu Aug 18 23:22:26 2011        (r427)
@@ -53,12 +53,13 @@
         link.total_size = size
     return link.total_size
 
+
 def generate(site):
     """Output the list of large pages to the given file descriptor."""
     # get all internal pages and get big links
     links = site.links.filter_by(is_page=True, is_internal=True)
-    links = [ x for x in links
-              if _getsize(x) >= config.REPORT_SLOW_URL_SIZE * 1024 ]
+    links = [x for x in links
+             if _getsize(x) >= config.REPORT_SLOW_URL_SIZE * 1024]
     # sort links by size (biggest first)
     links.sort(lambda a, b: cmp(b.total_size, a.total_size))
     # present results
@@ -68,7 +69,7 @@
           '   <p class="description">\n'
           '    No pages over %(size)dK were found.\n'
           '   </p>\n'
-          % { 'size': config.REPORT_SLOW_URL_SIZE })
+          % {'size': config.REPORT_SLOW_URL_SIZE})
         plugins.close_html(fp)
         return
     fp.write(
@@ -77,7 +78,7 @@
       '    slow to download.\n'
       '   </p>\n'
       '   <ul>\n'
-      % { 'size': config.REPORT_SLOW_URL_SIZE })
+      % {'size': config.REPORT_SLOW_URL_SIZE})
     for link in links:
         size = plugins.get_size(link.total_size)
         fp.write(
@@ -87,11 +88,11 @@
           '      <li>size: %(size)s</li>\n'
           '     </ul>\n'
           '    </li>\n'
-          % { 'link': plugins.make_link(link),
-              'size': size })
+          % {'link': plugins.make_link(link),
+             'size': size})
         link.add_pageproblem(
           'this page and its components is %(size)s'
-          % { 'size': size })
+          % {'size': size})
     fp.write(
-      '   </ul>\n' )
+      '   </ul>\n')
     plugins.close_html(fp)

Modified: webcheck/plugins/urllist.py
==============================================================================
--- webcheck/plugins/urllist.py Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/plugins/urllist.py Thu Aug 18 23:22:26 2011        (r427)
@@ -39,10 +39,10 @@
       '    the website. It lists internal as well as external and\n'
       '    non-examined urls.\n'
       '   </p>\n'
-      '   <ol>\n' )
+      '   <ol>\n')
     links = site.links.order_by(db.Link.url)
     for link in links:
         fp.write('    <li>' + plugins.make_link(link, link.url) + '</li>\n')
     fp.write(
-      '   </ol>\n' )
+      '   </ol>\n')
     plugins.close_html(fp)

Modified: webcheck/webcheck.py
==============================================================================
--- webcheck/webcheck.py        Wed Aug 10 22:42:57 2011        (r426)
+++ webcheck/webcheck.py        Thu Aug 18 23:22:26 2011        (r427)
@@ -58,18 +58,21 @@
       'Albert Hopkins (marduk), Mike W. Meyer and Arthur de Jong.\n'
       'This is free software; see the source for copying conditions.  There is 
NO\n'
       'warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR 
PURPOSE.\n'
-      % { 'version': __version__ })
+      % {'version': __version__})
+
 
 def print_usage():
     """Print short usage information."""
     sys.stderr.write(
       'Usage: webcheck [OPTION]... URL...\n')
 
+
 def print_tryhelp():
     """Print friendly pointer to more information."""
     sys.stderr.write(
       'Try \'webcheck --help\' for more information.\n')
 
+
 def print_help():
     """Print the option list."""
     sys.stdout.write(
@@ -96,7 +99,8 @@
       '  -w, --wait=SECONDS     wait SECONDS between retrievals\n'
       '  -V, --version          output version information and exit\n'
       '  -h, --help             display this help and exit\n'
-      % { 'redirects': config.REDIRECT_DEPTH } )
+      % {'redirects': config.REDIRECT_DEPTH})
+
 
 def parse_args(site):
     """Parse command-line arguments."""
@@ -151,7 +155,7 @@
             elif flag in ('-h', '--help'):
                 print_help()
                 sys.exit(0)
-        if len(args)==0 and not config.CONTINUE:
+        if len(args) == 0 and not config.CONTINUE:
             print_usage()
             print_tryhelp()
             sys.exit(1)
@@ -186,6 +190,7 @@
         sys.stderr.write('webcheck: %s\n' % str(e))
         sys.exit(1)
 
+
 def install_file(source, text=False):
     """Install the given file in the output directory.
     If the text flag is set to true it is assumed the file is text,
@@ -226,23 +231,24 @@
         sfp = open(source, mode)
     except IOError, (errno, strerror):
         debugio.error('%(fname)s: %(strerror)s' %
-                      { 'fname': source,
-                        'strerror': strerror })
+                      {'fname': source,
+                       'strerror': strerror})
         sys.exit(1)
     # create file in output directory (with overwrite question)
-    tfp = plugins.open_file(os.path.basename(source));
+    tfp = plugins.open_file(os.path.basename(source))
     # copy contents
     shutil.copyfileobj(sfp, tfp)
     # close files
     tfp.close()
     sfp.close()
 
+
 def main(site):
     """Main program."""
     # crawl through the website
     debugio.info('checking site....')
     crawler.setup_urllib2()
-    site.crawl() # this will take a while
+    site.crawl()  # this will take a while
     debugio.info('done.')
     # do postprocessing (building site structure, etc)
     debugio.info('postprocessing....')
@@ -259,6 +265,7 @@
     install_file('favicon.ico', False)
     debugio.info('done.')
 
+
 if __name__ == '__main__':
     try:
         # initialize site object
-- 
To unsubscribe send an email to
webcheck-commits-unsubscribe@lists.arthurdejong.org or see
http://lists.arthurdejong.org/webcheck-commits