blue mojo [hyung-joon kim]
Quick Search!
자신의 RSS 리더기에 Feeds 추가하기
#! /home/bin/python
# -*- coding: utf-8 -*-
# DO NOT REMOVE THE ENCODING DECLARATION ABOVE

######################################################################
#  BMFeedsForOne.cgi v0.1
#  ------------------------------------------------------------------
#  To check update, visit http://www.ibluemojon.com
#  To contact me, email to khj1218@gmail.com
#  ------------------------------------------------------------------
#  Feel free to use, modify, spread it out, or do anything with this
#  script, but PLEASE LET ME KNOW if you find any bug, make a better
#  change on your own, or have a comment, suggestion, or whatsoever.
######################################################################

import cgi, time
#import cgitb
#cgitb.enable()
from urllib import urlopen
from xml.dom import minidom

PUBDATE_IDX_MAP = {}
INFO_DICT = {}
ITEM_LIST = []
IDX = 0

def getFeedInfo():
    """ Retrieve feed information from the config file - feed.info"""
    try:
        f = open("feed.info")
        for line in f.readlines():
            if line[0] != '#':
                line = line.strip().split('=',1)
                if len(line) == 2:
                    INFO_DICT[line[0]] = line[1]
    except:
        return

def genSiteFeed():
    """ Generate a convergent site feed by extracting only <items> from
        multiple feeds and sorting them in pub-date order."""
    getFeedInfo()
    keys = INFO_DICT.keys()
    for key in keys:
        if 'feed' in key:
            # Parse several XML feeds to combine into a single site feed
            xml_feed = minidom.parse(urlopen(INFO_DICT[key]))
            # Extract only <item> elements that are mostly needed
            handleItems(xml_feed.getElementsByTagName("item"))

    if len(PUBDATE_IDX_MAP) == IDX:
        printSiteFeedHeader()
        sortItemsByPubDate()
        printSiteFeedFooter()
    else:
        print "Duplicate <pubDate> omitted when used as key in PUBDATE_IDX_MAP"
        print "Error in generating Site Feed"

def handleItems(items):
    """ Extract items from a xml feed and insert them to a single item list.
        Also, build [item_index : pubDate] map in order to sort all the collected
        items in pubDate-order later on. """
    global IDX
    for item in items:
        pubDateElement = item.getElementsByTagName("pubDate")[0]
        pubDate = getText(pubDateElement.childNodes)
        ITEM_LIST.append(item)
        # Build [item_index : pubDate] map
        PUBDATE_IDX_MAP[int(time.mktime(normalizeTime(pubDate)))] = IDX
        IDX += 1   # NOTE: Upon success, IDX should be the same as len(PUBDATE_IDX_MAP)

def getText(nodelist):
    """ Retrieve text data of a node in XML element"""
    record = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
            record = record + node.data
    return record

def normalizeTime(pubDate):
    """ Normalize the different time format of <pubDate> """
    try:
        return time.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z")
    except:
        format = "%a, %d %b %Y %H:%M:%S " + pubDate.split()[-1]
        return time.strptime(pubDate, format)

def sortItemsByPubDate():
    """ Sort the items collected into a single item list in pubDate-order.
        Then, print the result in xml document. """
    pubDate_keys = PUBDATE_IDX_MAP.keys()
    pubDate_keys.sort(reverse=True)
    for key in pubDate_keys:
        print ITEM_LIST[PUBDATE_IDX_MAP[key]].toxml("utf-8")

def printSiteFeedHeader():
    buildDate = time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime(time.time()+32400))
    xmlns = ""
    for key in INFO_DICT.keys():
        if 'xmlns' in key:
            xmlns = xmlns+key+"=\""+INFO_DICT[key]+"\" "
    print "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" +\
          "<rss "+xmlns+"version='2.0'><channel>\n" +\
          "<title>%s</title>\n" %INFO_DICT['title']  +\
          "<link>%s</link>\n" %INFO_DICT['homepage_url']  +\
          "<description>%s</description>\n" %INFO_DICT['desc'] +\
          "<language>ko</language>\n" +\
          "<generator>BMUnifyFeeds 0.2</generator>\n" +\
          "<lastBuildDate>%s +0900</lastBuildDate>\n" %buildDate +\
          "<docs>http://blogs.law.harvard.edu/tech/rss</docs>\n"

def printSiteFeedFooter():
    print "</channel></rss>"


##############################
#   Main call
##############################
print "Content-Type: text/xml\n"
genSiteFeed()