lists.arthurdejong.org
RSS feed

webcheck branch master updated. 1.10.4-66-g44fc843

[Date Prev][Date Next] [Thread Prev][Thread Next]

webcheck branch master updated. 1.10.4-66-g44fc843



This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "webcheck".

The branch, master has been updated
       via  44fc843ea803118aeffc4914f17414eaee040e0b (commit)
       via  7c4b1725490bbba1b53cf074869714daefa54024 (commit)
      from  24e191f42e45b408d1b34210dcedb710d201a669 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://arthurdejong.org/git/webcheck/commit/?id=44fc843ea803118aeffc4914f17414eaee040e0b

commit 44fc843ea803118aeffc4914f17414eaee040e0b
Author: Arthur de Jong <arthur@arthurdejong.org>
Date:   Sun Sep 22 15:42:19 2013 +0200

    Properly write an UTF-8 encoded output file
    
    Write output using codecs.open() with the UTF-8 encoding. This also
    introduces a consistency improvement in argument naming.

diff --git a/webcheck/util.py b/webcheck/util.py
index 1851bce..911e3a2 100644
--- a/webcheck/util.py
+++ b/webcheck/util.py
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006, 2007, 2008, 2010, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2007, 2008, 2010, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -22,6 +22,7 @@
 # The files produced as output from the software do not automatically fall
 # under the copyright of the software, unless explicitly stated otherwise.
 
+import codecs
 import logging
 import os
 import shutil
@@ -35,7 +36,7 @@ from webcheck import config
 logger = logging.getLogger(__name__)
 
 
-def open_file(filename, istext=True, makebackup=False):
+def open_file(filename, is_text=True, makebackup=False):
     """This returns an open file object which can be used for writing. This
     file is created in the output directory. The output directory (stored in
     config.OUTPUT_DIR is created if it does not yet exist. If the second
@@ -64,19 +65,19 @@ def open_file(filename, istext=True, makebackup=False):
             elif res[0] != 'y':
                 raise SystemExit('Aborted.')
     # open the file for writing
-    if istext:
-        return open(fname, 'w')
+    if is_text:
+        return codecs.open(fname, encoding='utf-8', mode='w')
     else:
         return open(fname, 'wb')
 
 
-def install_file(source, text=False):
+def install_file(source, is_text=False):
     """Install the given file in the output directory.
-    If the text flag is set to true it is assumed the file is text,
+    If the is_text flag is set to true it is assumed the file is text,
     translating line endings."""
     # figure out mode to open the file with
     mode = 'r'
-    if text:
+    if is_text:
         mode += 'U'
     # check with what kind of argument we are called
     scheme = urlparse.urlsplit(source)[0]
@@ -104,7 +105,7 @@ def install_file(source, text=False):
     # open the input file
     sfp = open(source, mode)
     # create file in output directory (with overwrite question)
-    tfp = open_file(os.path.basename(source))
+    tfp = open_file(os.path.basename(source), is_text=is_text)
     # copy contents
     shutil.copyfileobj(sfp, tfp)
     # close files

http://arthurdejong.org/git/webcheck/commit/?id=7c4b1725490bbba1b53cf074869714daefa54024

commit 7c4b1725490bbba1b53cf074869714daefa54024
Author: Arthur de Jong <arthur@arthurdejong.org>
Date:   Sat Sep 21 17:46:57 2013 +0200

    Explicityly close database sessions
    
    This tries to close the session when the function is done with it to
    avoid using too much memory.

diff --git a/webcheck/crawler.py b/webcheck/crawler.py
index 749485a..0c1a2da 100644
--- a/webcheck/crawler.py
+++ b/webcheck/crawler.py
@@ -337,6 +337,7 @@ class Crawler(object):
             logger.debug('items left to check: %d' %
                           (remaining + len(tocheck)))
         session.commit()
+        session.close()
 
     def fetch(self, link):
         """Attempt to fetch the url (if not yanked) and fill in link
@@ -451,6 +452,7 @@ class Crawler(object):
             if hasattr(plugin, 'postprocess'):
                 logger.info(plugin.__name__)
                 plugin.postprocess(self)
+        #session.close() do not close because bases uses the session
 
     def generate(self):
         """Generate pages for plugins."""
diff --git a/webcheck/plugins/about.py b/webcheck/plugins/about.py
index 25a2c62..014f1b7 100644
--- a/webcheck/plugins/about.py
+++ b/webcheck/plugins/about.py
@@ -111,3 +111,4 @@ def generate(crawler):
     fp.write(
       '   </ul>\n')
     webcheck.plugins.close_html(fp)
+    session.close()
diff --git a/webcheck/plugins/anchors.py b/webcheck/plugins/anchors.py
index 917ce73..64af983 100644
--- a/webcheck/plugins/anchors.py
+++ b/webcheck/plugins/anchors.py
@@ -1,7 +1,7 @@
 
 # anchors.py - plugin check for missing anchors
 #
-# Copyright (C) 2006, 2007, 2011 Arthur de Jong
+# Copyright (C) 2006, 2007, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -49,3 +49,4 @@ def postprocess(crawler):
                      'anchor': anchor})
     # commit changes in session
     session.commit()
+    session.close()
diff --git a/webcheck/plugins/badlinks.py b/webcheck/plugins/badlinks.py
index 899005a..716f1f2 100644
--- a/webcheck/plugins/badlinks.py
+++ b/webcheck/plugins/badlinks.py
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006, 2007, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2007, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -46,6 +46,7 @@ def postprocess(crawler):
             for parent in link.parents:
                 parent.add_pageproblem('bad link: %s: %s' % (link.url, 
problem))
     session.commit()
+    session.close()
 
 
 def generate(crawler):
@@ -88,3 +89,4 @@ def generate(crawler):
     fp.write(
       '   </ol>\n')
     webcheck.plugins.close_html(fp)
+    session.close()
diff --git a/webcheck/plugins/external.py b/webcheck/plugins/external.py
index 34073e3..9a7681f 100644
--- a/webcheck/plugins/external.py
+++ b/webcheck/plugins/external.py
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006, 2009, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2009, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -66,3 +66,4 @@ def generate(crawler):
     fp.write(
       '   </ol>\n')
     webcheck.plugins.close_html(fp)
+    session.close()
diff --git a/webcheck/plugins/images.py b/webcheck/plugins/images.py
index df3f53d..05c9369 100644
--- a/webcheck/plugins/images.py
+++ b/webcheck/plugins/images.py
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -60,3 +60,4 @@ def generate(crawler):
     fp.write(
       '   </ol>\n')
     webcheck.plugins.close_html(fp)
+    session.close()
diff --git a/webcheck/plugins/new.py b/webcheck/plugins/new.py
index fbd043d..96392e4 100644
--- a/webcheck/plugins/new.py
+++ b/webcheck/plugins/new.py
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -75,3 +75,4 @@ def generate(crawler):
              'age':  age})
     fp.write('   </ul>\n')
     webcheck.plugins.close_html(fp)
+    session.close()
diff --git a/webcheck/plugins/notchkd.py b/webcheck/plugins/notchkd.py
index bbb9fdd..eecf025 100644
--- a/webcheck/plugins/notchkd.py
+++ b/webcheck/plugins/notchkd.py
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -66,3 +66,4 @@ def generate(crawler):
     fp.write(
       '   </ol>\n')
     webcheck.plugins.close_html(fp)
+    session.close()
diff --git a/webcheck/plugins/notitles.py b/webcheck/plugins/notitles.py
index 2a335f5..605619a 100644
--- a/webcheck/plugins/notitles.py
+++ b/webcheck/plugins/notitles.py
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -44,6 +44,7 @@ def postprocess(crawler):
     for link in links:
         link.add_pageproblem('missing title')
     session.commit()
+    session.close()
 
 
 def generate(crawler):
@@ -75,3 +76,4 @@ def generate(crawler):
     fp.write(
       '   </ol>\n')
     webcheck.plugins.close_html(fp)
+    session.close()
diff --git a/webcheck/plugins/old.py b/webcheck/plugins/old.py
index e507104..e061248 100644
--- a/webcheck/plugins/old.py
+++ b/webcheck/plugins/old.py
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -77,3 +77,4 @@ def generate(crawler):
     fp.write(
       '   </ul>\n')
     webcheck.plugins.close_html(fp)
+    session.close()
diff --git a/webcheck/plugins/problems.py b/webcheck/plugins/problems.py
index 5a8ed5e..19f71d2 100644
--- a/webcheck/plugins/problems.py
+++ b/webcheck/plugins/problems.py
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006, 2007, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2007, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -125,3 +125,4 @@ def generate(crawler):
     fp.write(
       '   </ul>\n')
     webcheck.plugins.close_html(fp)
+    session.close()
diff --git a/webcheck/plugins/size.py b/webcheck/plugins/size.py
index c318b29..2d5b570 100644
--- a/webcheck/plugins/size.py
+++ b/webcheck/plugins/size.py
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 1998, 1999 Albert Hopkins (marduk)
 # Copyright (C) 2002 Mike W. Meyer
-# Copyright (C) 2005, 2006, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -93,3 +93,4 @@ def generate(crawler):
     fp.write(
       '   </ul>\n')
     webcheck.plugins.close_html(fp)
+    session.close()
diff --git a/webcheck/plugins/urllist.py b/webcheck/plugins/urllist.py
index b8e4b22..d3ae8cf 100644
--- a/webcheck/plugins/urllist.py
+++ b/webcheck/plugins/urllist.py
@@ -1,7 +1,7 @@
 
 # urllist.py - plugin to generate a list of visited urls
 #
-# Copyright (C) 2005, 2006, 2011 Arthur de Jong
+# Copyright (C) 2005, 2006, 2011, 2013 Arthur de Jong
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -47,3 +47,4 @@ def generate(crawler):
     fp.write(
       '   </ol>\n')
     webcheck.plugins.close_html(fp)
+    session.close()

-----------------------------------------------------------------------

Summary of changes:
 webcheck/crawler.py          |    2 ++
 webcheck/plugins/about.py    |    1 +
 webcheck/plugins/anchors.py  |    3 ++-
 webcheck/plugins/badlinks.py |    4 +++-
 webcheck/plugins/external.py |    3 ++-
 webcheck/plugins/images.py   |    3 ++-
 webcheck/plugins/new.py      |    3 ++-
 webcheck/plugins/notchkd.py  |    3 ++-
 webcheck/plugins/notitles.py |    4 +++-
 webcheck/plugins/old.py      |    3 ++-
 webcheck/plugins/problems.py |    3 ++-
 webcheck/plugins/size.py     |    3 ++-
 webcheck/plugins/urllist.py  |    3 ++-
 webcheck/util.py             |   17 +++++++++--------
 14 files changed, 36 insertions(+), 19 deletions(-)


hooks/post-receive
-- 
webcheck
-- 
To unsubscribe send an email to
webcheck-commits-unsubscribe@lists.arthurdejong.org or see
http://lists.arthurdejong.org/webcheck-commits/