lists.arthurdejong.org
RSS feed

webcheck commit: r438 - webcheck/webcheck

[Date Prev][Date Next] [Thread Prev][Thread Next]

webcheck commit: r438 - webcheck/webcheck



Author: arthur
Date: Fri Sep 16 22:05:32 2011
New Revision: 438
URL: http://arthurdejong.org/viewvc/webcheck?revision=438&view=revision

Log:
pass the IO timeout to urllib2

Modified:
   webcheck/webcheck/crawler.py

Modified: webcheck/webcheck/crawler.py
==============================================================================
--- webcheck/webcheck/crawler.py        Fri Sep 16 21:45:50 2011        (r437)
+++ webcheck/webcheck/crawler.py        Fri Sep 16 22:05:32 2011        (r438)
@@ -34,9 +34,7 @@
 import os
 import re
 import robotparser
-import socket
 import time
-import urllib
 import urllib2
 import urlparse
 
@@ -48,6 +46,7 @@
 
 
 class RedirectError(urllib2.HTTPError):
+
     def __init__(self, url, code, msg, hdrs, fp, newurl):
         self.newurl = newurl
         urllib2.HTTPError.__init__(self, url, code, msg, hdrs, fp)
@@ -306,7 +305,7 @@
             parent = link.parents.first()
             if parent:
                 request.add_header('Referer', parent.url)
-            response = urllib2.urlopen(request)
+            response = urllib2.urlopen(request, 
timeout=webcheck.config.IOTIMEOUT)
             link.mimetype = response.info().gettype()
             link.set_encoding(response.headers.getparam('charset'))
             # FIXME: get result code and other stuff
-- 
To unsubscribe send an email to
webcheck-commits-unsubscribe@lists.arthurdejong.org or see
http://lists.arthurdejong.org/webcheck-commits/