]> wagner.pp.ru Git - oss/ljdump.git/blobdiff - convertdump.py
added command line arguments and some comments and warnings
[oss/ljdump.git] / convertdump.py
index 6f394ce7433093d47b625d4520d02b773b803d9e..106b274d796e8d918882249d95f4b2ea2de18686 100755 (executable)
@@ -1,11 +1,19 @@
 #!/usr/bin/python
 
 import xml.dom.minidom 
+import os
+import codecs
+import sys
+
+from time import strptime, strftime
 
 def getNodeText(doc, nodename):
     rc = ""
 
-    nodelist = doc.getElementsByTagName(nodename)[0].childNodes
+    try:
+        nodelist = doc.getElementsByTagName(nodename)[0].childNodes
+    except:
+        return ""
 
     for node in nodelist:
         if node.nodeType == node.TEXT_NODE:
@@ -14,19 +22,31 @@ def getNodeText(doc, nodename):
     return rc
 
 def appendTextNode(doc, parent, nodename, value):
+    nodeValue = value
+
+    # make sure value is properly encoded
+    try:
+        bytes = nodeValue.encode("UTF-8")
+    except:
+        bytes = nodeValue.encode("cp1252")
+        nodeValue = unicode(bytes, "UTF-8")
+
     element = doc.createElement(nodename)
-    textNode = doc.createTextNode(value)
-    element.appendChild(textNode)
+
+    if( nodeValue != "" ): 
+        textNode = doc.createTextNode(nodeValue)
+        element.appendChild(textNode)
+
     parent.appendChild(element)
 
 
-def addEntryForID(doc, username, id):
+def addEntryForId(outDoc, element, username, id):
     entryFile = open("%s/L-%s" % (username,id), "r")
     inDoc = xml.dom.minidom.parse(entryFile)
 
     # Create an entry element
     entry = outDoc.createElement("entry")
-    ljElement.appendChild(entry)
+    element.appendChild(entry)
 
     # Create an itemid element
     appendTextNode(outDoc, entry, "itemid", getNodeText(inDoc,"itemid"))
@@ -43,10 +63,13 @@ def addEntryForID(doc, username, id):
     appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
 
     # Create an allowmask element (doesn't exist in pydump output if public)
-    try:
-        appendTextNode(outDoc, entry, "allowmask", 
-            getNodeText(inDoc, "allowmask"))
-    except:
+    maskText = getNodeText(inDoc, "allowmask")
+
+    # XXXSMG: consult L-1411 and L-976 for examples of security and
+    # allowmask use
+    if(maskText != ""):
+        appendTextNode(outDoc, entry, "allowmask", maskText)
+    else:
         appendTextNode(outDoc, entry, "allowmask", "0")
 
     # Create a taglist element
@@ -54,17 +77,122 @@ def addEntryForID(doc, username, id):
 
     # XXXSMG: make sure there is a comment file before trying to do anything
     # with it
-    commentFile = open("%s/C-%s" % (username,id), "r")
+    addCommentsForId(outDoc, entry, username, id)
+
+def addCommentsForId(outDoc, entry, username, id):
+    try: 
+        commentFile = open("%s/C-%s" % (username,id), "r")
+    except IOError:  # there are no comments for this entry
+        return
+
+    inDoc = xml.dom.minidom.parse(commentFile)
+
+    comments = inDoc.getElementsByTagName("comment")
+
+    for comment in comments:
+        outComment = outDoc.createElement("comment")
+        entry.appendChild(outComment)
+
+        # add the item id for the comment
+        appendTextNode(outDoc, outComment, "itemid", 
+            getNodeText(comment, "id"))
+
+        # convert the time string
+        timeString = getNodeText(comment, "date")
+        if( timeString != "" ):
+            inDate = strptime(timeString, "%Y-%m-%dT%H:%M:%SZ")
+            outDate = strftime("%Y-%m-%d %H:%M:%S", inDate)
+            appendTextNode(outDoc, outComment, "eventtime", outDate)
+        else:
+            emptyTime = outDoc.createElement("eventtime")
+            outComment.appendChild(emptyTime)
+
+        # Create an subject element
+        appendTextNode(outDoc, outComment, "subject", 
+            getNodeText(comment, "subject"))
+
+        # Create an event element
+        appendTextNode(outDoc, outComment, "event", 
+            getNodeText(comment, "body"))
+
+        # Create the author element
+        author = outDoc.createElement("author")
+        outComment.appendChild(author)
+
+        try:
+            cUser = getNodeText(comment, "user")
+        except:
+            cUser = "anonymous"
+
+        appendTextNode(outDoc, author, "name", cUser)
+        appendTextNode(outDoc, author, "email", cUser + "@livejournal.com")
+        
+        # Create the parent_itemid
+        parentId = getNodeText(comment, "parentid")
+        if(parentId != ""): 
+            appendTextNode(outDoc, outComment, "parent_itemid", parentId)
+
+def main(argv): 
+    username = ""
+    entryLimit = 250
     
 
-# Create the minidom document
-outDoc = xml.dom.minidom.Document()
+    if( len(argv) != 2 ):
+        print( "Usage: convertdump.py <username> <entrylimit>" )
+        return
+    else:
+        username = argv[0]
+        entryLimit = int(argv[1])
+
+    userDir = os.listdir(username)
+
+    highNum = -1
+    entryArray = []
+
+    # get the list of entries
+    for file in userDir:
+        if file.startswith("L-"):
+            entryNum = int(file.replace("L-",""))
+
+            entryArray.append(entryNum)
+
+            if( highNum < entryNum ):
+                highNum = entryNum
+
+    entryArray.sort()
+
+
+    # Create the minidom document
+    outDoc = xml.dom.minidom.Document()
+
+    # Create the <livejournal> base element
+    ljElement = outDoc.createElement("livejournal")
+    outDoc.appendChild(ljElement)
+
+    entryLimit = 250
+    currentFileEntry = 0
+
+    # start processing entries
+    for entry in entryArray:
+        addEntryForId(outDoc, ljElement, username, entry)
+
+        currentFileEntry += 1
+
+        if( currentFileEntry == entryLimit ):
+
+            f = open("%s - %s.xml" % (username, entry), "w")
+            tempXML = outDoc.toxml("UTF-8")
+            f.write(tempXML)
+            
+            currentFileEntry = 0
+
+            # Create the minidom document
+            outDoc = xml.dom.minidom.Document()
 
-# Create the <livejournal> base element
-ljElement = outDoc.createElement("livejournal")
-outDoc.appendChild(ljElement)
+            # Create the <livejournal> base element
+            ljElement = outDoc.createElement("livejournal")
+            outDoc.appendChild(ljElement)
 
-addEntryForID(outDoc, "grahams", "2583")
+if __name__ == "__main__":
+    main(sys.argv[1:])
 
-# Print our newly created XML
-print outDoc.toprettyxml(indent="  ")