#! /home/bin/python # -*- coding: utf-8 -*- # DO NOT REMOVE THE ENCODING DECLARATION ABOVE ###################################################################### # BMFeedsForOne.cgi v0.1 # ------------------------------------------------------------------ # To check update, visit http://www.ibluemojon.com # To contact me, email to khj1218_at_gmail.com # ------------------------------------------------------------------ # Feel free to use, modify, spread it out, or do anything with this # script, but PLEASE LET ME KNOW if you find any bug, make a better # change on your own, or have a comment, suggestion, or whatsoever. ###################################################################### import cgi, time #import cgitb #cgitb.enable() from urllib import urlopen from xml.dom import minidom PUBDATE_IDX_MAP = {} INFO_DICT = {} ITEM_LIST = [] IDX = 0 def getFeedInfo(): """ Retrieve feed information from the config file - feed.info""" try: f = open("feed.info") for line in f.readlines(): if line[0] != '#': line = line.strip().split('=',1) if len(line) == 2: INFO_DICT[line[0]] = line[1] except: return def genSiteFeed(): """ Generate a convergent site feed by extracting only from multiple feeds and sorting them in pub-date order.""" getFeedInfo() keys = INFO_DICT.keys() for key in keys: if 'feed' in key: # Parse several XML feeds to combine into a single site feed xml_feed = minidom.parse(urlopen(INFO_DICT[key])) # Extract only elements that are mostly needed handleItems(xml_feed.getElementsByTagName("item")) if len(PUBDATE_IDX_MAP) == IDX: printSiteFeedHeader() sortItemsByPubDate() printSiteFeedFooter() else: print "Duplicate omitted when used as key in PUBDATE_IDX_MAP" print "Error in generating Site Feed" def handleItems(items): """ Extract items from a xml feed and insert them to a single item list. Also, build [item_index : pubDate] map in order to sort all the collected items in pubDate-order later on. """ global IDX for item in items: pubDateElement = item.getElementsByTagName("pubDate")[0] pubDate = getText(pubDateElement.childNodes) ITEM_LIST.append(item) # Build [item_index : pubDate] map PUBDATE_IDX_MAP[int(time.mktime(normalizeTime(pubDate)))] = IDX IDX += 1 # NOTE: Upon success, IDX should be the same as len(PUBDATE_IDX_MAP) def getText(nodelist): """ Retrieve text data of a node in XML element""" record = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: record = record + node.data return record def normalizeTime(pubDate): """ Normalize the different time format of """ try: return time.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z") except: format = "%a, %d %b %Y %H:%M:%S " + pubDate.split()[-1] return time.strptime(pubDate, format) def sortItemsByPubDate(): """ Sort the items collected into a single item list in pubDate-order. Then, print the result in xml document. """ pubDate_keys = PUBDATE_IDX_MAP.keys() pubDate_keys.sort(reverse=True) for key in pubDate_keys: print ITEM_LIST[PUBDATE_IDX_MAP[key]].toxml("utf-8") def printSiteFeedHeader(): buildDate = time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime(time.time()+32400)) xmlns = "" for key in INFO_DICT.keys(): if 'xmlns' in key: xmlns = xmlns+key+"=\""+INFO_DICT[key]+"\" " print "\n" +\ "\n" +\ "%s\n" %INFO_DICT['title'] +\ "%s\n" %INFO_DICT['homepage_url'] +\ "%s\n" %INFO_DICT['desc'] +\ "ko\n" +\ "BMUnifyFeeds 0.2\n" +\ "%s +0900\n" %buildDate +\ "http://blogs.law.harvard.edu/tech/rss\n" def printSiteFeedFooter(): print "" ############################## # Main call ############################## print "Content-Type: text/xml\n" genSiteFeed()