import codecs
import sys
import getopt
+import re
from time import strptime, strftime
# Create an event node (special case because for some reason there are two
# 'event' elements in the pydump output, which is probably LJ's fault)
event = inDoc.getElementsByTagName("event")[0]
- appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
+ eventText = getNodeText(event, "event")
+
+ appendTextNode(outDoc, entry, "event", replaceLJTags(eventText))
security = getNodeText(inDoc, "security")
getNodeText(comment, "subject"))
# Create an event element
- appendTextNode(outDoc, outComment, "event",
- getNodeText(comment, "body"))
+ bodyText = getNodeText(comment, "body")
+ appendTextNode(outDoc, outComment, "event", replaceLJTags(bodyText))
# Create the author element
author = outDoc.createElement("author")
if(parentId != ""):
appendTextNode(outDoc, outComment, "parent_itemid", parentId)
+def replaceLJTags(entry):
+ rv = entry
+
+ # replace lj user tags
+ userRE = re.compile('<lj user="(.*?)" ?/?>', re.IGNORECASE)
+ rv = re.sub(userRE, '<a href="http://\\1.livejournal.com/" class="lj-user">\\1</a>', rv)
+
+ # replace lj comm tags
+ commRE = re.compile('<lj comm="(.*?)" ?/?>', re.IGNORECASE)
+ rv = re.sub(commRE, '<a href="http://community.livejournal.com/\\1/" class="lj-comm">\\1</a>', rv)
+
+ # replace lj-cut tags
+ namedCutRE = re.compile('<lj-cut +text="(.*?)" ?/?>',
+ re.IGNORECASE|re.DOTALL)
+ rv = re.sub(namedCutRE, '<!--more \\1-->', rv)
+
+ cutRE = re.compile('<lj-cut>', re.IGNORECASE)
+ rv = re.sub(cutRE, '<!--more-->', rv)
+
+ cutRE = re.compile('</lj-cut>', re.IGNORECASE)
+ rv = re.sub(cutRE, '', rv)
+
+ return rv
+
+
def usage():
print( "Usage: convertdump.py [arguments]" )
print( """