import os
import codecs
import sys
+import getopt
+import re
from time import strptime, strftime
# Create an event node (special case because for some reason there are two
# 'event' elements in the pydump output, which is probably LJ's fault)
event = inDoc.getElementsByTagName("event")[0]
- appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
+ eventText = getNodeText(event, "event")
+
+ appendTextNode(outDoc, entry, "event", replaceLJTags(eventText))
security = getNodeText(inDoc, "security")
getNodeText(comment, "subject"))
# Create an event element
- appendTextNode(outDoc, outComment, "event",
- getNodeText(comment, "body"))
+ bodyText = getNodeText(comment, "body")
+ appendTextNode(outDoc, outComment, "event", replaceLJTags(bodyText))
# Create the author element
author = outDoc.createElement("author")
if(parentId != ""):
appendTextNode(outDoc, outComment, "parent_itemid", parentId)
+def replaceLJTags(entry):
+ rv = entry
+
+ # replace lj user tags
+ userRE = re.compile('<lj user="(.*?)" ?/?>', re.IGNORECASE)
+ rv = re.sub(userRE, '<a href="http://\\1.livejournal.com/" class="lj-user">\\1</a>', rv)
+
+ # replace lj comm tags
+ commRE = re.compile('<lj comm="(.*?)" ?/?>', re.IGNORECASE)
+ rv = re.sub(commRE, '<a href="http://community.livejournal.com/\\1/" class="lj-comm">\\1</a>', rv)
+
+ # replace lj-cut tags
+ namedCutRE = re.compile('<lj-cut +text="(.*?)" ?/?>',
+ re.IGNORECASE|re.DOTALL)
+ rv = re.sub(namedCutRE, '<!--more \\1-->', rv)
+
+ cutRE = re.compile('<lj-cut>', re.IGNORECASE)
+ rv = re.sub(cutRE, '<!--more-->', rv)
+
+ cutRE = re.compile('</lj-cut>', re.IGNORECASE)
+ rv = re.sub(cutRE, '', rv)
+
+ return rv
+
+
+def usage():
+ print( "Usage: convertdump.py [arguments]" )
+ print( """
+This will convert a pydump archive into something compatible with the
+WordPress LiveJournal importer. This is the same format used by the Windows
+ljArchive exporter.
+
+Arguments:
+ -u --user username of archive to process [required]
+ -l --limit limit the number of entries in each xml file (default 250)
+ -i --insecure include private and protected entries in the output
+ -h --help show this help page
+
+Example:
+ ./convertdump.py --user stevemartin --limit 200 --insecure
+""")
+
+
def main(argv):
username = ""
entryLimit = 250
includeSecure = False;
-
- if( len(argv) < 2 ):
- print( "Usage: convertdump.py <username> <entrylimit>" )
- return
- else:
- username = argv[0]
- entryLimit = int(argv[1])
- try:
- includeSecure = bool(argv[2])
- except IndexError:
- includeSecure = False
+ if( len(argv) == 0 ):
+ usage()
+ sys.exit(2)
- if(includeSecure == True):
- print( "Warning: Including secure entries in XML output" )
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "hu:l:i", ["help",
+ "user=",
+ "limit=",
+ "insecure"])
+ except getopt.GetoptError, err:
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ sys.exit(2)
+
+ for o, a in opts:
+ if o == "-v":
+ verbose = True
+ elif o in ("-u", "--user"):
+ username = a
+ elif o in ("-l", "--limit"):
+ entryLimit = int(a)
+ elif o in ("-i", "--insecure"):
+ print( "Warning: Including secure entries in XML output" )
+ includeSecure = True
+ elif o in ("-h", "--help"):
+ usage()
+ sys.exit()
+ else:
+ assert False, "unhandled option"
userDir = os.listdir(username)