import codecs
import sys
import getopt
+import re
from time import strptime, strftime
# Create an event node (special case because for some reason there are two
# 'event' elements in the pydump output, which is probably LJ's fault)
event = inDoc.getElementsByTagName("event")[0]
- appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
+ eventText = getNodeText(event, "event")
+
+ appendTextNode(outDoc, entry, "event", replaceLJTags(eventText))
security = getNodeText(inDoc, "security")
getNodeText(comment, "subject"))
# Create an event element
- appendTextNode(outDoc, outComment, "event",
- getNodeText(comment, "body"))
+ bodyText = getNodeText(comment, "body")
+ appendTextNode(outDoc, outComment, "event", replaceLJTags(bodyText))
# Create the author element
author = outDoc.createElement("author")
if(parentId != ""):
appendTextNode(outDoc, outComment, "parent_itemid", parentId)
+
+# regular expressions used in replaceLJTags()
+# (global for later reuse - suggestion by jparise)
+
+userRE = re.compile('<lj user="(.*?)" ?/?>', re.IGNORECASE)
+commRE = re.compile('<lj comm="(.*?)" ?/?>', re.IGNORECASE)
+namedCutRE = re.compile('<lj-cut +text="(.*?)" ?/?>',
+ re.IGNORECASE|re.DOTALL)
+cutRE = re.compile('<lj-cut>', re.IGNORECASE)
+cutRE = re.compile('</lj-cut>', re.IGNORECASE)
+embedRE = re.compile('<lj-embed id="[0-9]+">', re.IGNORECASE)
+
+def replaceLJTags(entry):
+ rv = entry
+
+ # replace lj user tags
+ rv = re.sub(userRE, '<a href="http://www.livejournal.com/users/\\1" class="lj-user">\\1</a>', rv)
+
+ # replace lj comm tags
+ rv = re.sub(commRE, '<a href="http://community.livejournal.com/\\1/" class="lj-comm">\\1</a>', rv)
+
+ # replace lj-cut tags
+ rv = re.sub(namedCutRE, '<!--more \\1-->', rv)
+ rv = re.sub(cutRE, '<!--more-->', rv)
+ rv = re.sub(cutRE, '', rv)
+
+ # replace lj-embed tags
+ # this doesn't actually work. LJ doesn't include the embedded content
+ # when ljdump calls 'getevents', but instead includes an lj-embed tag
+ # with an id and nothing else.
+ #rv = re.sub(embedRE, '', rv)
+
+ return rv
+
+
def usage():
print( "Usage: convertdump.py [arguments]" )
print( """