#!/usr/bin/python
+# Copyright 2009, Sean M. Graham (www.sean-graham.com)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
import xml.dom.minidom
import os
import codecs
+import sys
+import getopt
+import re
+
from time import strptime, strftime
def getNodeText(doc, nodename):
parent.appendChild(element)
-def addEntryForId(outDoc, username, id):
+def addEntryForId(outDoc, element, username, id, includeSecure):
entryFile = open("%s/L-%s" % (username,id), "r")
inDoc = xml.dom.minidom.parse(entryFile)
# Create an entry element
entry = outDoc.createElement("entry")
- ljElement.appendChild(entry)
# Create an itemid element
appendTextNode(outDoc, entry, "itemid", getNodeText(inDoc,"itemid"))
# Create an event node (special case because for some reason there are two
# 'event' elements in the pydump output, which is probably LJ's fault)
event = inDoc.getElementsByTagName("event")[0]
- appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
+ eventText = getNodeText(event, "event")
+
+ appendTextNode(outDoc, entry, "event", replaceLJTags(eventText))
+
+ security = getNodeText(inDoc, "security")
+
+ if(security != ""):
+ # don't append this entry unless the user provided the argument
+ if(includeSecure == False):
+ print("omitting secure entry: L-%s" % id)
+ return
+ else:
+ if(security == "usemask"):
+ print("including allowmask entry: L-%s" % id)
+
+ # Create an allowmask element
+ maskText = getNodeText(inDoc, "allowmask")
- # Create an allowmask element (doesn't exist in pydump output if public)
- maskText = getNodeText(inDoc, "allowmask")
+ if(maskText != ""):
+ appendTextNode(outDoc, entry, "allowmask", maskText)
+ else:
+ appendTextNode(outDoc, entry, "allowmask", "0")
+ else:
+ print("including private entry: L-%s" % id)
- if(maskText != ""):
- appendTextNode(outDoc, entry, "allowmask", maskText)
- else:
- appendTextNode(outDoc, entry, "allowmask", "0")
+ appendTextNode(outDoc, entry, "security", security)
# Create a taglist element
appendTextNode(outDoc, entry, "taglist", getNodeText(inDoc, "taglist"))
# with it
addCommentsForId(outDoc, entry, username, id)
+ element.appendChild(entry)
+
def addCommentsForId(outDoc, entry, username, id):
try:
commentFile = open("%s/C-%s" % (username,id), "r")
getNodeText(comment, "subject"))
# Create an event element
- appendTextNode(outDoc, outComment, "event",
- getNodeText(comment, "body"))
+ bodyText = getNodeText(comment, "body")
+ appendTextNode(outDoc, outComment, "event", replaceLJTags(bodyText))
# Create the author element
author = outDoc.createElement("author")
appendTextNode(outDoc, outComment, "parent_itemid", parentId)
+# regular expressions used in replaceLJTags()
+# (global for later reuse - suggestion by jparise)
+userRE = re.compile('<lj user="(.*?)" ?/?>', re.IGNORECASE)
+commRE = re.compile('<lj comm="(.*?)" ?/?>', re.IGNORECASE)
+namedCutRE = re.compile('<lj-cut +text="(.*?)" ?/?>',
+ re.IGNORECASE|re.DOTALL)
+cutRE = re.compile('<lj-cut>', re.IGNORECASE)
+cutRE = re.compile('</lj-cut>', re.IGNORECASE)
+embedRE = re.compile('<lj-embed id="[0-9]+">', re.IGNORECASE)
-userDir = os.listdir("grahams")
+def replaceLJTags(entry):
+ rv = entry
-highNum = -1
-entryArray = []
+ # replace lj user tags
+ rv = re.sub(userRE, '<a href="http://www.livejournal.com/users/\\1" class="lj-user">\\1</a>', rv)
-# get the list of entries
-for file in userDir:
- if file.startswith("L-"):
- entryNum = int(file.replace("L-",""))
+ # replace lj comm tags
+ rv = re.sub(commRE, '<a href="http://community.livejournal.com/\\1/" class="lj-comm">\\1</a>', rv)
- entryArray.append(entryNum)
+ # replace lj-cut tags
+ rv = re.sub(namedCutRE, '<!--more \\1-->', rv)
+ rv = re.sub(cutRE, '<!--more-->', rv)
+ rv = re.sub(cutRE, '', rv)
- if( highNum < entryNum ):
- highNum = entryNum
+ # replace lj-embed tags
+ # this doesn't actually work. LJ doesn't include the embedded content
+ # when ljdump calls 'getevents', but instead includes an lj-embed tag
+ # with an id and nothing else.
+ #rv = re.sub(embedRE, '', rv)
-entryArray.sort()
+ return rv
-# Create the minidom document
-outDoc = xml.dom.minidom.Document()
+def usage():
+ print( "Usage: convertdump.py [arguments]" )
+ print( """
+This will convert a pydump archive into something compatible with the
+WordPress LiveJournal importer. This is the same format used by the Windows
+ljArchive exporter.
-# Create the <livejournal> base element
-ljElement = outDoc.createElement("livejournal")
-outDoc.appendChild(ljElement)
+Arguments:
+ -u --user username of archive to process [required]
+ -l --limit limit the number of entries in each xml file (default 250)
+ -i --insecure include private and protected entries in the output
+ -h --help show this help page
-breakup = 250
-currentFileEntry = 0
+Example:
+ ./convertdump.py --user stevemartin --limit 200 --insecure
+""")
-# start processing entries
-for entry in entryArray:
- addEntryForId(outDoc, "grahams", entry)
- currentFileEntry += 1
+def main(argv):
+ username = ""
+ entryLimit = 250
+ includeSecure = False;
- if( currentFileEntry == breakup ):
+ if( len(argv) == 0 ):
+ usage()
+ sys.exit(2)
- f = open("grahams - %s.xml" % entry, "w")
- tempXML = outDoc.toxml("UTF-8")
- f.write(tempXML)
-
- currentFileEntry = 0
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "hu:l:i", ["help",
+ "user=",
+ "limit=",
+ "insecure"])
+ except getopt.GetoptError, err:
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ sys.exit(2)
+
+ for o, a in opts:
+ if o == "-v":
+ verbose = True
+ elif o in ("-u", "--user"):
+ username = a
+ elif o in ("-l", "--limit"):
+ entryLimit = int(a)
+ elif o in ("-i", "--insecure"):
+ print( "Warning: Including secure entries in XML output" )
+ includeSecure = True
+ elif o in ("-h", "--help"):
+ usage()
+ sys.exit()
+ else:
+ assert False, "unhandled option"
+
+ userDir = os.listdir(username)
+
+ highNum = -1
+ entryArray = []
+
+ # get the list of entries
+ for file in userDir:
+ if file.startswith("L-"):
+ entryNum = int(file.replace("L-",""))
+
+ entryArray.append(entryNum)
+
+ if( highNum < entryNum ):
+ highNum = entryNum
+
+ entryArray.sort()
+
+ # Create the minidom document
+ outDoc = xml.dom.minidom.Document()
+
+ # Create the <livejournal> base element
+ ljElement = outDoc.createElement("livejournal")
+ outDoc.appendChild(ljElement)
+
+ currentFileEntry = 0
+
+ # start processing entries
+ for entry in entryArray:
+ addEntryForId(outDoc, ljElement, username, entry, includeSecure)
+
+ currentFileEntry += 1
+
+ if( currentFileEntry == entryLimit or entry == entryArray[-1] ):
+
+ f = open("%s - %s.xml" % (username, entry), "w")
+ tempXML = outDoc.toxml("UTF-8")
+ f.write(tempXML)
+
+ currentFileEntry = 0
+
+ # Create the minidom document
+ outDoc = xml.dom.minidom.Document()
+
+ # Create the <livejournal> base element
+ ljElement = outDoc.createElement("livejournal")
+ outDoc.appendChild(ljElement)
- # Create the minidom document
- outDoc = xml.dom.minidom.Document()
+if __name__ == "__main__":
+ main(sys.argv[1:])
- # Create the <livejournal> base element
- ljElement = outDoc.createElement("livejournal")
- outDoc.appendChild(ljElement)