#!/usr/bin/python
+# Copyright 2009, Sean M. Graham (www.sean-graham.com)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
import xml.dom.minidom
+import os
+import codecs
+import sys
+import getopt
+
+from time import strptime, strftime
def getNodeText(doc, nodename):
rc = ""
- nodelist = doc.getElementsByTagName(nodename)[0].childNodes
+ try:
+ nodelist = doc.getElementsByTagName(nodename)[0].childNodes
+ except:
+ return ""
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
return rc
def appendTextNode(doc, parent, nodename, value):
+ nodeValue = value
+
+ # make sure value is properly encoded
+ try:
+ bytes = nodeValue.encode("UTF-8")
+ except:
+ bytes = nodeValue.encode("cp1252")
+ nodeValue = unicode(bytes, "UTF-8")
+
element = doc.createElement(nodename)
- textNode = doc.createTextNode(value)
- element.appendChild(textNode)
+
+ if( nodeValue != "" ):
+ textNode = doc.createTextNode(nodeValue)
+ element.appendChild(textNode)
+
parent.appendChild(element)
-def addEntryForID(doc, username, id):
+def addEntryForId(outDoc, element, username, id, includeSecure):
entryFile = open("%s/L-%s" % (username,id), "r")
inDoc = xml.dom.minidom.parse(entryFile)
# Create an entry element
entry = outDoc.createElement("entry")
- ljElement.appendChild(entry)
# Create an itemid element
appendTextNode(outDoc, entry, "itemid", getNodeText(inDoc,"itemid"))
event = inDoc.getElementsByTagName("event")[0]
appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
- # Create an allowmask element (doesn't exist in pydump output if public)
- try:
- appendTextNode(outDoc, entry, "allowmask",
- getNodeText(inDoc, "allowmask"))
- except:
- appendTextNode(outDoc, entry, "allowmask", "0")
+ security = getNodeText(inDoc, "security")
+
+ if(security != ""):
+ # don't append this entry unless the user provided the argument
+ if(includeSecure == False):
+ print("omitting secure entry: L-%s" % id)
+ return
+ else:
+ if(security == "usemask"):
+ print("including allowmask entry: L-%s" % id)
+
+ # Create an allowmask element
+ maskText = getNodeText(inDoc, "allowmask")
+
+ if(maskText != ""):
+ appendTextNode(outDoc, entry, "allowmask", maskText)
+ else:
+ appendTextNode(outDoc, entry, "allowmask", "0")
+ else:
+ print("including private entry: L-%s" % id)
+
+ appendTextNode(outDoc, entry, "security", security)
# Create a taglist element
appendTextNode(outDoc, entry, "taglist", getNodeText(inDoc, "taglist"))
# XXXSMG: make sure there is a comment file before trying to do anything
# with it
- commentFile = open("%s/C-%s" % (username,id), "r")
-
+ addCommentsForId(outDoc, entry, username, id)
+
+ element.appendChild(entry)
+
+def addCommentsForId(outDoc, entry, username, id):
+ try:
+ commentFile = open("%s/C-%s" % (username,id), "r")
+ except IOError: # there are no comments for this entry
+ return
+
+ inDoc = xml.dom.minidom.parse(commentFile)
+
+ comments = inDoc.getElementsByTagName("comment")
+
+ for comment in comments:
+ outComment = outDoc.createElement("comment")
+ entry.appendChild(outComment)
+
+ # add the item id for the comment
+ appendTextNode(outDoc, outComment, "itemid",
+ getNodeText(comment, "id"))
+
+ # convert the time string
+ timeString = getNodeText(comment, "date")
+ if( timeString != "" ):
+ inDate = strptime(timeString, "%Y-%m-%dT%H:%M:%SZ")
+ outDate = strftime("%Y-%m-%d %H:%M:%S", inDate)
+ appendTextNode(outDoc, outComment, "eventtime", outDate)
+ else:
+ emptyTime = outDoc.createElement("eventtime")
+ outComment.appendChild(emptyTime)
+
+ # Create an subject element
+ appendTextNode(outDoc, outComment, "subject",
+ getNodeText(comment, "subject"))
+
+ # Create an event element
+ appendTextNode(outDoc, outComment, "event",
+ getNodeText(comment, "body"))
+
+ # Create the author element
+ author = outDoc.createElement("author")
+ outComment.appendChild(author)
+
+ try:
+ cUser = getNodeText(comment, "user")
+ except:
+ cUser = "anonymous"
+
+ appendTextNode(outDoc, author, "name", cUser)
+ appendTextNode(outDoc, author, "email", cUser + "@livejournal.com")
+
+ # Create the parent_itemid
+ parentId = getNodeText(comment, "parentid")
+ if(parentId != ""):
+ appendTextNode(outDoc, outComment, "parent_itemid", parentId)
+
+def usage():
+ print( "Usage: convertdump.py [arguments]" )
+ print( """
+This will convert a pydump archive into something compatible with the
+WordPress LiveJournal importer. This is the same format used by the Windows
+ljArchive exporter.
+
+Arguments:
+ -u --user username of archive to process [required]
+ -l --limit limit the number of entries in each xml file (default 250)
+ -i --insecure include private and protected entries in the output
+ -h --help show this help page
+
+Example:
+ ./convertdump.py --user stevemartin --limit 200 --insecure
+""")
+
+
+def main(argv):
+ username = ""
+ entryLimit = 250
+ includeSecure = False;
+
+ if( len(argv) == 0 ):
+ usage()
+ sys.exit(2)
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "hu:l:i", ["help",
+ "user=",
+ "limit=",
+ "insecure"])
+ except getopt.GetoptError, err:
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ sys.exit(2)
+
+ for o, a in opts:
+ if o == "-v":
+ verbose = True
+ elif o in ("-u", "--user"):
+ username = a
+ elif o in ("-l", "--limit"):
+ entryLimit = int(a)
+ elif o in ("-i", "--insecure"):
+ print( "Warning: Including secure entries in XML output" )
+ includeSecure = True
+ elif o in ("-h", "--help"):
+ usage()
+ sys.exit()
+ else:
+ assert False, "unhandled option"
+
+ userDir = os.listdir(username)
+
+ highNum = -1
+ entryArray = []
+
+ # get the list of entries
+ for file in userDir:
+ if file.startswith("L-"):
+ entryNum = int(file.replace("L-",""))
+
+ entryArray.append(entryNum)
+
+ if( highNum < entryNum ):
+ highNum = entryNum
+
+ entryArray.sort()
+
+ # Create the minidom document
+ outDoc = xml.dom.minidom.Document()
+
+ # Create the <livejournal> base element
+ ljElement = outDoc.createElement("livejournal")
+ outDoc.appendChild(ljElement)
+
+ currentFileEntry = 0
+
+ # start processing entries
+ for entry in entryArray:
+ addEntryForId(outDoc, ljElement, username, entry, includeSecure)
+
+ currentFileEntry += 1
+
+ if( currentFileEntry == entryLimit or entry == entryArray[-1] ):
+
+ f = open("%s - %s.xml" % (username, entry), "w")
+ tempXML = outDoc.toxml("UTF-8")
+ f.write(tempXML)
+
+ currentFileEntry = 0
-# Create the minidom document
-outDoc = xml.dom.minidom.Document()
+ # Create the minidom document
+ outDoc = xml.dom.minidom.Document()
-# Create the <livejournal> base element
-ljElement = outDoc.createElement("livejournal")
-outDoc.appendChild(ljElement)
+ # Create the <livejournal> base element
+ ljElement = outDoc.createElement("livejournal")
+ outDoc.appendChild(ljElement)
-addEntryForID(outDoc, "grahams", "2583")
+if __name__ == "__main__":
+ main(sys.argv[1:])
-# Print our newly created XML
-print outDoc.toprettyxml(indent=" ")