webcheck commit: r463 - webcheck/webcheck
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
webcheck commit: r463 - webcheck/webcheck
- From: Commits of the webcheck project <webcheck-commits [at] lists.arthurdejong.org>
- To: webcheck-commits [at] lists.arthurdejong.org
- Reply-to: webcheck-users [at] lists.arthurdejong.org
- Subject: webcheck commit: r463 - webcheck/webcheck
- Date: Wed, 16 Nov 2011 12:18:21 +0100 (CET)
Author: devin
Date: Wed Nov 16 12:18:19 2011
New Revision: 463
URL: http://arthurdejong.org/viewvc/webcheck?revision=463&view=revision
Log:
detect self-referencing redirects even with intermediate pages
Modified:
webcheck/webcheck/db.py
Modified: webcheck/webcheck/db.py
==============================================================================
--- webcheck/webcheck/db.py Wed Nov 16 12:17:29 2011 (r462)
+++ webcheck/webcheck/db.py Wed Nov 16 12:18:19 2011 (r463)
@@ -96,7 +96,7 @@
@staticmethod
def clean_url(url):
- # normalise the URL, removing the fragment from the URL
+ """normalise the URL, removing the fragment from the URL"""
return urlparse.urldefrag(normalizeurl(url))[0]
def _get_link(self, url):
@@ -108,7 +108,7 @@
# try to find the link
instance = session.query(Link).filter_by(url=url).first()
if not instance:
- if config.MAX_DEPTH and self.depth >= config.MAX_DEPTH:
+ if config.MAX_DEPTH != None and self.depth >= config.MAX_DEPTH:
logger.debug('link %s too deep', url)
instance = Link(url=url, depth=self.depth + 1)
session.add(instance)
@@ -135,18 +135,21 @@
def add_redirect(self, url):
"""Indicate that this link redirects to the specified url."""
+ session = object_session(self)
url = self.clean_url(url)
- # figure out depth
+ # check for (possibly indirect) redirects to self
+ for link in session.query(Link).filter_by(url=url):
+ if link.follow_link() == self:
+ link.add_linkproblem('redirects back to source: %s' % self.url)
+ self.add_linkproblem('redirects back to source: %s' % link.url)
+ return
+ # figure out depth (how can [self.redirectdepth] ever by non-zero?)
self.redirectdepth = max([self.redirectdepth] +
[x.redirectdepth for x in self.parents]) + 1
# check depth
if self.redirectdepth >= config.REDIRECT_DEPTH:
self.add_linkproblem('too many redirects (%d)' %
self.redirectdepth)
return
- # check for redirect to self
- if url == self.url:
- self.add_linkproblem('redirect same as source: %s' % url)
- return
# add child
self.add_child(url)
--
To unsubscribe send an email to
webcheck-commits-unsubscribe@lists.arthurdejong.org or see
http://lists.arthurdejong.org/webcheck-commits/
- webcheck commit: r463 - webcheck/webcheck,
Commits of the webcheck project