Converting a Directory of ReSTructured files to Atom feed

In one of the previous posts I sketched out how to generate an Atom feed using Python and Kid. Here I show a complete script that can convert a directory of restructured text files into an Atom feed -- great for writing blogs using restructured text.

The Python Script

Here is the entire script:

#!/usr/bin/env python
#
# Bojan Nikolic <bojan@bnikolic.co.uk>
# Process RST documents into an atom feed
#
#
# Need to consider whole directory at time
#

import time
import os
import stat
import sys

from StringIO import StringIO
from docutils.core import publish_file
from xml.etree import ElementTree
from xml.etree.ElementTree import XML
import kid

# List of files that do not go into the atom feed
_exlist = ["index.txt",
           "blog.txt"]

_template = "static/bnfeed.kid"
_output   = "build/blog/blog.atom"


def ModTimeISO8601(fname):
    """ISO 8601 time string of file modification """
    t= os.stat(fname)[stat.ST_MTIME]
    return time.strftime('%Y-%m-%dT%H:%M:%SZ',
                         time.gmtime(t))


def RestToXML(rest_file):
    """Convert restructured text to XML"""
    xhtml_file = StringIO()
    xhtml_file.close = lambda: None
    xhtml = publish_file(open(rest_file),
                         writer_name='xml',
                         destination=xhtml_file,
                         settings_overrides={"doctitle_xform": 0})
    xhtml_file.seek(0)
    x=XML(xhtml_file.read())
    return x


def BlogFileP(fname):
    """Should fname go into the blog?"""
    return (os.path.splitext(fname)[1]==".txt" and
            (fname not in _exlist))


def BlogFiles(fnamelist):
    """Return list of files that should go into blog"""
    return filter(BlogFileP,
                  fnamelist)

def FileInfo(fname):
    """Return information about blog in fname

    :return: tuple of filename, title, first paragraph, modification
             time
    """
    x= RestToXML(fname)
    plain=os.path.splitext(os.path.basename(fname))[0]
    return (plain,
            x[0].find("title").text,
            ElementTree.tostring(x[0].find("paragraph")),
            ModTimeISO8601(fname))

def Kiddify(kid_fname,
            blist,
            fnameout,
            **kwargs):

    template=kid.Template(file=kid_fname,
                          blist=blist,
                          **kwargs)
    fout=open(fnameout, "w")
    fout.write(template.serialize(output="xml"))

def Atomise(dirnamein):
    """Turn directory into atom feed"""
    fl = BlogFiles( os.listdir(dirnamein))
    bl = [FileInfo(os.path.join(dirnamein,x)) for x in fl]
    mostrecent= max([y[3] for  y in bl])
    Kiddify(_template,
            bl,
            _output,
            mostrecent=mostrecent)


if __name__ == '__main__':
    Atomise(*sys.argv[1:])

The script takes only one argument, the name of the directory that you wish to turn into the feed. It then finds all .txt files except index.txt and blog.txt and converts them from restructured text to XML.

From XML the script extract the title and the first paragraph of the text, from which it finally construct the Atom feed. The last modification time of each text file is used to date the members of the Atom feeds. Simple, but quite effective.