RSS and Wiki Headline News Reader
svn: r12252
This commit is contained in:
parent
c6633a559e
commit
c7127fe162
@ -26,7 +26,19 @@
|
||||
import re
|
||||
import gobject
|
||||
import urllib
|
||||
from xml.dom import minidom, Node
|
||||
# FIXME For Python 3:
|
||||
# Change:
|
||||
# import urllib
|
||||
# To:
|
||||
# import urllib.request
|
||||
# Change:
|
||||
# url_info = urllib.urlopen(URL)
|
||||
# To:
|
||||
# url_info = urllib.request.urlopen(URL)
|
||||
import sys
|
||||
from htmlentitydefs import name2codepoint as n2cp
|
||||
import re
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
#
|
||||
@ -37,6 +49,26 @@ from DataViews import register, Gramplet
|
||||
from const import URL_WIKISTRING
|
||||
from TransUtils import sgettext as _
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
#
|
||||
# Local functions
|
||||
#
|
||||
#------------------------------------------------------------------------
|
||||
def substitute(match):
|
||||
ent = match.group(2)
|
||||
if match.group(1) == "#":
|
||||
return unichr(int(ent))
|
||||
else:
|
||||
cp = n2cp.get(ent)
|
||||
if cp:
|
||||
return unichr(cp)
|
||||
else:
|
||||
return match.group()
|
||||
|
||||
def decode_html(string):
|
||||
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
|
||||
return entity_re.subn(substitute, string)[0]
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
#
|
||||
# Gramplet class
|
||||
@ -53,8 +85,21 @@ class HeadlineNewsGramplet(Gramplet):
|
||||
"""
|
||||
Initialize gramplet. Start up update timer.
|
||||
"""
|
||||
self.set_tooltip(_("Read headline news from the GRAMPS wiki"))
|
||||
self.limit = 5
|
||||
# Description, Type, URL, Pretty URL for User
|
||||
self.feeds = [
|
||||
("GRAMPS Wiki Headline News", "wiki", (self.RAW % "HeadlineNews"), (self.URL % "HeadlineNews")),
|
||||
("GRAMPS Blog Comments", "rss", "http://blog.gramps-project.org/?feed=comments-rss", None),
|
||||
("GRAMPS Blog Posts", "rss", "http://blog.gramps-project.org/?feed=rss", None),
|
||||
("GRAMPS Wiki Changes", "rss", "http://www.gramps-project.org/wiki/index.php?title=Special:RecentChanges&feed=rss", None),
|
||||
("GRAMPS Bugtracker Issues", "rss", "http://www.gramps-project.org/bugs/issues_rss.php?key=ece7d21451d76337acf776c9a4384773", None),
|
||||
("GRAMPS SVN Commits", "rss", "http://cia.vc/stats/project/Gramps/.rss", None),
|
||||
]
|
||||
self.set_tooltip(_("Read GRAMPS headline news"))
|
||||
self.update_interval = 3600 * 1000 # in miliseconds (1 hour)
|
||||
self.set_use_markup(True)
|
||||
self.set_wrap(False)
|
||||
self.set_text(_("No Family Tree loaded."))
|
||||
self.timer = gobject.timeout_add(self.update_interval,
|
||||
self.update_by_timer)
|
||||
|
||||
@ -66,22 +111,118 @@ class HeadlineNewsGramplet(Gramplet):
|
||||
return True # keep updating!
|
||||
|
||||
def main(self):
|
||||
continuation = self.process('HeadlineNews')
|
||||
retval = True
|
||||
while retval:
|
||||
retval, text = continuation.next()
|
||||
self.set_text(text)
|
||||
yield True
|
||||
self.cleanup(text)
|
||||
yield False
|
||||
self.set_text("Loading GRAMPS Headline News...\n")
|
||||
fresh = True
|
||||
yield True
|
||||
for (feed_description, feed_type, feed_url, pretty_url) in self.feeds:
|
||||
fp = urllib.urlopen(feed_url)
|
||||
if feed_type == "wiki":
|
||||
text = fp.read()
|
||||
if fresh:
|
||||
self.clear_text()
|
||||
fresh = False
|
||||
self.render_text("""<u><b>%s</b></u> [<a href="%s">wiki</a>]\n""" % (feed_description, pretty_url))
|
||||
self.render_text(self.decode_wiki(text).strip())
|
||||
self.append_text("\n")
|
||||
yield True
|
||||
elif feed_type == "rss":
|
||||
try:
|
||||
xmldoc = minidom.parse(fp)
|
||||
except Exception, e:
|
||||
print "Headline News Gramplet Error: RSS parse failed on '%s': %s" % (feed_description, e)
|
||||
continue
|
||||
if fresh:
|
||||
self.clear_text()
|
||||
fresh = False
|
||||
self.render_text("""<u><b>%s</b></u> [<a href="%s">RSS</a>]\n""" % (feed_description, feed_url))
|
||||
yield True
|
||||
rootNode = xmldoc.documentElement
|
||||
for node in rootNode.childNodes:
|
||||
#print "> ", node.nodeName
|
||||
if (node.nodeName == "channel"):
|
||||
count = 1
|
||||
for node2 in node.childNodes:
|
||||
if count > 5: break
|
||||
if (node2.nodeName == "item"):
|
||||
title = ""
|
||||
link = ""
|
||||
desc = ""
|
||||
# Gather up the data:
|
||||
for item_node in node2.childNodes:
|
||||
#print "---> ", item_node.nodeName
|
||||
if (item_node.nodeName == "title"):
|
||||
for text_node in item_node.childNodes:
|
||||
if (text_node.nodeType == node.TEXT_NODE):
|
||||
title += text_node.nodeValue
|
||||
elif (item_node.nodeName == "link"):
|
||||
for text_node in item_node.childNodes:
|
||||
if (text_node.nodeType == node.TEXT_NODE):
|
||||
link += text_node.nodeValue
|
||||
elif (item_node.nodeName == "description"):
|
||||
for text_node in item_node.childNodes:
|
||||
if (text_node.nodeType == node.TEXT_NODE):
|
||||
desc += text_node.nodeValue
|
||||
if title:
|
||||
if link:
|
||||
self.render_text(" %d. " % count)
|
||||
self.link(title, "URL", link, tooltip=link)
|
||||
else:
|
||||
self.render_text(" %d. %s" % (count, title))
|
||||
self.append_text(" - ")
|
||||
self.append_text(self.first_line(desc))
|
||||
self.append_text("\n")
|
||||
count += 1
|
||||
yield True
|
||||
self.append_text("\n")
|
||||
self.append_text("", scroll_to="begin")
|
||||
|
||||
def cleanup(self, text):
|
||||
def first_line(self, text):
|
||||
text = self.strip_html(text)
|
||||
text = decode_html(text)
|
||||
text = text.split("\n")[0]
|
||||
if len(text) > 30:
|
||||
text = text[:30]
|
||||
return text + "..."
|
||||
|
||||
def strip_html(self, text):
|
||||
text = text.replace("nbsp;", " ")
|
||||
retval = ""
|
||||
last_c = None
|
||||
state = "plain"
|
||||
for c in text:
|
||||
if c == "<":
|
||||
state = "skip"
|
||||
if state == "plain":
|
||||
if c in ["\t", " ", "\n"]:
|
||||
if (c == last_c):
|
||||
continue
|
||||
retval += c
|
||||
last_c = c
|
||||
if c == ">":
|
||||
state = "plain"
|
||||
return retval
|
||||
|
||||
def decode_wiki(self, text):
|
||||
# final text
|
||||
text = text.replace("<BR>", "\n")
|
||||
while "\n\n\n" in text:
|
||||
text = text.replace("\n\n\n", "\n\n")
|
||||
text = text.strip()
|
||||
## Wiki text:
|
||||
## Templates:
|
||||
pattern = '{{.*?}}'
|
||||
matches = re.findall(pattern, text)
|
||||
for match in matches:
|
||||
page = match[2:-2]
|
||||
oldtext = match
|
||||
if "|" in page:
|
||||
template, heading, body = page.split("|", 2)
|
||||
if template.lower() == "release":
|
||||
newtext = "GRAMPS " + heading + " released.\n\n"
|
||||
else:
|
||||
#newtext = "<B>%s</B>\n\n" % heading
|
||||
newtext = ""
|
||||
text = text.replace(oldtext, newtext)
|
||||
### Internal wiki URL with title:
|
||||
pattern = re.compile('\[\[(.*?)\|(.*?)\]\]')
|
||||
matches = pattern.findall(text)
|
||||
@ -117,16 +258,7 @@ class HeadlineNewsGramplet(Gramplet):
|
||||
matches = pattern.findall(text)
|
||||
for match in matches:
|
||||
text = text.replace("'''%s'''" % match, "<B>%s</B>" % match)
|
||||
text = """<I>Live update from <A HREF="http://gramps-project.org/">www.gramps-project.org</A></I>:\n\n""" + text
|
||||
self.clear_text()
|
||||
self.set_use_markup(True)
|
||||
try:
|
||||
self.render_text(text)
|
||||
except:
|
||||
cla, exc, trbk = sys.exc_info()
|
||||
self.append_text(_("Error") + (" : %s %s\n\n" %(cla, exc)))
|
||||
self.append_text(text)
|
||||
self.append_text("", scroll_to="begin")
|
||||
return text
|
||||
|
||||
def wiki(self, title):
|
||||
return (self.URL % title)
|
||||
@ -134,55 +266,6 @@ class HeadlineNewsGramplet(Gramplet):
|
||||
def nice_title(self, title):
|
||||
return title.replace("_", " ")
|
||||
|
||||
def process(self, title):
|
||||
#print "processing '%s'..." % title
|
||||
title = self.nice_title(title)
|
||||
yield True, (_("Reading") + " '%s'..." % title)
|
||||
fp = urllib.urlopen(self.RAW % title)
|
||||
text = fp.read()
|
||||
#text = text.replace("\n", " ")
|
||||
html = re.findall('<.*?>', text)
|
||||
for exp in html:
|
||||
text = text.replace(exp, "")
|
||||
text = text.replace("\n", "<BR>")
|
||||
fp.close()
|
||||
pattern = '{{.*?}}'
|
||||
matches = re.findall(pattern, text)
|
||||
#print " before:", text
|
||||
for match in matches:
|
||||
page = match[2:-2]
|
||||
oldtext = match
|
||||
if "|" in page:
|
||||
template, heading, body = page.split("|", 2)
|
||||
if template.lower() == "release":
|
||||
newtext = "GRAMPS " + heading + " released.<BR><BR>"
|
||||
else:
|
||||
newtext = "<B>%s</B><BR><BR>" % heading
|
||||
newtext += body + "<BR>"
|
||||
text = text.replace(oldtext, newtext)
|
||||
else: # a macro/redirect
|
||||
continuation = self.process("Template:" + page)
|
||||
retval = True
|
||||
while retval:
|
||||
retval, newtext = continuation.next()
|
||||
yield True, newtext
|
||||
text = text.replace(oldtext, newtext)
|
||||
#print " after:", text
|
||||
pattern = '#REDIRECT \[\[.*?\]\]'
|
||||
matches = re.findall(pattern, text)
|
||||
#print " before:", text
|
||||
for match in matches:
|
||||
page = match[12:-2]
|
||||
oldtext = match
|
||||
continuation = self.process(page)
|
||||
retval = True
|
||||
while retval:
|
||||
retval, newtext = continuation.next()
|
||||
yield True, newtext
|
||||
text = text.replace(oldtext, newtext)
|
||||
#print " after:", text
|
||||
yield False, text
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
#
|
||||
# Register Gramplet
|
||||
|
Loading…
x
Reference in New Issue
Block a user