webcheck commit: r426 - in webcheck: . plugins
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
webcheck commit: r426 - in webcheck: . plugins
- From: Commits of the webcheck project <webcheck-commits [at] lists.arthurdejong.org>
- To: webcheck-commits [at] lists.arthurdejong.org
- Reply-to: webcheck-users [at] lists.arthurdejong.org
- Subject: webcheck commit: r426 - in webcheck: . plugins
- Date: Wed, 10 Aug 2011 22:43:01 +0200 (CEST)
Author: arthur
Date: Wed Aug 10 22:42:57 2011
New Revision: 426
URL: http://arthurdejong.org/viewvc/webcheck?view=rev&revision=426
Log:
re-enable the anchors plugin
Modified:
webcheck/config.py
webcheck/db.py
webcheck/myurllib.py
webcheck/plugins/anchors.py
Modified: webcheck/config.py
==============================================================================
--- webcheck/config.py Wed Aug 10 22:41:24 2011 (r425)
+++ webcheck/config.py Wed Aug 10 22:42:57 2011 (r426)
@@ -64,7 +64,7 @@
REDIRECT_DEPTH = 5
# The list of plugins that will be used to generate the report.
-PLUGINS = [ #'anchors',
+PLUGINS = [ 'anchors',
'sitemap',
'urllist',
'images',
Modified: webcheck/db.py
==============================================================================
--- webcheck/db.py Wed Aug 10 22:41:24 2011 (r425)
+++ webcheck/db.py Wed Aug 10 22:42:57 2011 (r426)
@@ -172,31 +172,23 @@
def add_anchor(self, anchor):
"""Indicate that this page contains the specified anchor."""
- return # FIXME: implement/update
# lowercase anchor
anchor = anchor.lower()
- # add anchor
- if anchor in self.anchors:
+ if self.anchors.filter(Anchor.anchor == anchor).first():
self.add_pageproblem(
'anchor/id "%(anchor)s" defined multiple times'
% { 'anchor': anchor })
else:
- self.anchors.add(anchor)
+ self.anchors.append(Anchor(anchor=anchor))
def add_reqanchor(self, parent, anchor):
"""Indicate that the specified link contains a reference to the
specified anchor. This can be checked later."""
- return # FIXME: implement/update
# lowercase anchor
anchor = anchor.lower()
- # convert the url to a link object if we were called with a url
- parent = self.__tolink(parent)
- # add anchor
- if anchor in self.reqanchors:
- if parent not in self.reqanchors[anchor]:
- self.reqanchors[anchor].add(parent)
- else:
- self.reqanchors[anchor] = set([parent])
+ # if RequestedAnchor doesn't exist, add it
+ if not self.reqanchors.filter((RequestedAnchor.parent_id == parent.id)
& (RequestedAnchor.anchor == anchor)).first():
+ self.reqanchors.append(RequestedAnchor(parent_id=parent.id,
anchor=anchor))
def follow_link(self, visited=None):
"""If this link represents a redirect return the redirect target,
@@ -254,3 +246,38 @@
def __unicode__(self):
return self.message
+
+
+class Anchor(Base):
+ """The named anchors (IDs) found on the page."""
+
+ __tablename__ = 'anchors'
+
+ id = Column(Integer, primary_key=True)
+ link_id = Column(Integer, ForeignKey('links.id', ondelete='CASCADE'))
+ link = relationship(Link, backref=backref('anchors',
+ lazy='dynamic',
+ cascade='all,delete,delete-orphan'))
+ anchor = Column(String)
+
+ def __unicode__(self):
+ return self.anchor
+
+
+class RequestedAnchor(Base):
+ """The named anchors (IDs) found on the page."""
+
+ __tablename__ = 'reqanchors'
+
+ id = Column(Integer, primary_key=True)
+ link_id = Column(Integer, ForeignKey('links.id', ondelete='CASCADE'))
+ link = relationship(Link, backref=backref('reqanchors',
+ lazy='dynamic',
+ cascade='all,delete,delete-orphan',
+ ), primaryjoin='Link.id == RequestedAnchor.link_id')
+ parent_id = Column(Integer, ForeignKey('links.id', ondelete='CASCADE'))
+ parent = relationship(Link, primaryjoin='Link.id ==
RequestedAnchor.parent_id')
+ anchor = Column(String)
+
+ def __unicode__(self):
+ return self.anchor
Modified: webcheck/myurllib.py
==============================================================================
--- webcheck/myurllib.py Wed Aug 10 22:41:24 2011 (r425)
+++ webcheck/myurllib.py Wed Aug 10 22:42:57 2011 (r426)
@@ -1,7 +1,7 @@
# myurllib.py - general purpose URL handling library
#
-# Copyright (C) 2007 Arthur de Jong
+# Copyright (C) 2007, 2011 Arthur de Jong
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -83,7 +83,7 @@
# make escaping consistent
url = _normalize_escapes(url)
# split the url in useful parts
- (scheme, netloc, path, query) = urlparse.urlsplit(url)[:4]
+ (scheme, netloc, path, query, fragment) = urlparse.urlsplit(url)
# remove any leading /../ parts
if scheme in ( 'http', 'https' ):
path = _leadingdotpattern.sub('', path)
@@ -108,8 +108,8 @@
# get rid of double slashes in some paths
if ( scheme == 'file' ):
path = _doubleslashpattern.sub('/', path)
- # put the url back together again (discarding fragment)
- return urlparse.urlunsplit((scheme, netloc, path, query, ''))
+ # put the url back together again
+ return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
def normalizeurl(url):
"""Return a normalized URL."""
Modified: webcheck/plugins/anchors.py
==============================================================================
--- webcheck/plugins/anchors.py Wed Aug 10 22:41:24 2011 (r425)
+++ webcheck/plugins/anchors.py Wed Aug 10 22:42:57 2011 (r426)
@@ -1,7 +1,7 @@
# anchors.py - plugin check for missing anchors
#
-# Copyright (C) 2006, 2007 Arthur de Jong
+# Copyright (C) 2006, 2007, 2011 Arthur de Jong
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -27,21 +27,23 @@
__title__ = 'missing anchors'
__author__ = 'Arthur de Jong'
+from sqlalchemy.orm.session import object_session
+
+import db
+
+
def generate(site):
"""Present the list of bad links to the given file descriptor."""
- # find all links with requested anchors
- links = [ x
- for x in site.linkMap.values()
- if len(x.reqanchors)>0 and x.isfetched ]
+ # find all fetched links with requested anchors
+ links = site.links.filter(db.Link.reqanchors.any()).filter(db.Link.fetched
!= None)
# go over list and find missing anchors
for link in links:
- # check all requested anchors
+ # check that all requested anchors exist
for anchor in link.reqanchors:
- # if the anchor is there there is no prolem
- if anchor in link.anchors:
- continue
- # report problem
- for parent in link.reqanchors[anchor]:
- parent.add_pageproblem(
- 'reference to undefined anchor/id "%(anchor)s"'
- % { 'anchor': anchor })
+ # if the anchor is not there there, report problem
+ if not link.anchors.filter(db.Anchor.anchor ==
anchor.anchor).first():
+ anchor.parent.add_pageproblem(
+ u'bad link: %(url)s#%(anchor)s: unknown anchor'
+ % {'url': link.url,
+ 'anchor': anchor })
+ # FIXME: commit changes in session
--
To unsubscribe send an email to
webcheck-commits-unsubscribe@lists.arthurdejong.org or see
http://lists.arthurdejong.org/webcheck-commits
- webcheck commit: r426 - in webcheck: . plugins,
Commits of the webcheck project