]> wagner.pp.ru Git - oss/ljdump.git/commitdiff
builds one big file now of all entries and comments for the user
authorSean M. Graham <grahams@milgrim.local>
Sun, 25 Jan 2009 01:59:42 +0000 (20:59 -0500)
committerSean M. Graham <grahams@milgrim.local>
Sun, 25 Jan 2009 01:59:42 +0000 (20:59 -0500)
convertdump.py

index 6f394ce7433093d47b625d4520d02b773b803d9e..f19be5f8243335c4ad1fa064b869a941be9a8cd1 100755 (executable)
@@ -1,11 +1,16 @@
 #!/usr/bin/python
 
 import xml.dom.minidom 
+import os
+from time import strptime, strftime
 
 def getNodeText(doc, nodename):
     rc = ""
 
-    nodelist = doc.getElementsByTagName(nodename)[0].childNodes
+    try:
+        nodelist = doc.getElementsByTagName(nodename)[0].childNodes
+    except:
+        return ""
 
     for node in nodelist:
         if node.nodeType == node.TEXT_NODE:
@@ -15,12 +20,15 @@ def getNodeText(doc, nodename):
 
 def appendTextNode(doc, parent, nodename, value):
     element = doc.createElement(nodename)
-    textNode = doc.createTextNode(value)
-    element.appendChild(textNode)
+
+    if( value != "" ): 
+        textNode = doc.createTextNode(value)
+        element.appendChild(textNode)
+
     parent.appendChild(element)
 
 
-def addEntryForID(doc, username, id):
+def addEntryForId(outDoc, username, id):
     entryFile = open("%s/L-%s" % (username,id), "r")
     inDoc = xml.dom.minidom.parse(entryFile)
 
@@ -54,8 +62,64 @@ def addEntryForID(doc, username, id):
 
     # XXXSMG: make sure there is a comment file before trying to do anything
     # with it
-    commentFile = open("%s/C-%s" % (username,id), "r")
-    
+    addCommentsForId(outDoc, entry, username, id)
+
+def addCommentsForId(outDoc, entry, username, id):
+    try: 
+        commentFile = open("%s/C-%s" % (username,id), "r")
+    except:
+        # there are no comments for this entry
+        return
+
+    inDoc = xml.dom.minidom.parse(commentFile)
+
+    comments = inDoc.getElementsByTagName("comment")
+
+    for comment in comments:
+        outComment = outDoc.createElement("comment")
+        entry.appendChild(outComment)
+
+        # add the item id for the comment
+        appendTextNode(outDoc, outComment, "itemid", 
+            getNodeText(comment, "id"))
+
+        # convert the time string
+        timeString = getNodeText(comment, "date")
+        if( timeString != "" ):
+            inDate = strptime(timeString, "%Y-%m-%dT%H:%M:%SZ")
+            outDate = strftime("%Y-%m-%d %H:%M:%S", inDate)
+            appendTextNode(outDoc, outComment, "eventtime", outDate)
+        else:
+            emptyTime = outDoc.createElement("eventtime")
+            outComment.appendChild(emptyTime)
+
+        # Create an subject element
+        appendTextNode(outDoc, outComment, "subject", 
+            getNodeText(comment, "subject"))
+
+        # Create an event element
+        appendTextNode(outDoc, outComment, "event", 
+            getNodeText(comment, "body"))
+
+        # Create the author element
+        author = outDoc.createElement("author")
+        outComment.appendChild(author)
+
+        try:
+            cUser = getNodeText(comment, "user")
+        except:
+            cUser = "anonymous"
+
+        appendTextNode(outDoc, author, "name", cUser)
+        appendTextNode(outDoc, author, "email", cUser + "@livejournal.com")
+        
+        # Create the parent_itemid
+        parentId = getNodeText(comment, "parentid")
+        if(parentId != ""): 
+            appendTextNode(outDoc, outComment, "parent_itemid", parentId)
+
+
+
 
 # Create the minidom document
 outDoc = xml.dom.minidom.Document()
@@ -64,7 +128,28 @@ outDoc = xml.dom.minidom.Document()
 ljElement = outDoc.createElement("livejournal")
 outDoc.appendChild(ljElement)
 
-addEntryForID(outDoc, "grahams", "2583")
+userDir = os.listdir("grahams")
+
+highNum = -1
+entryArray = []
+
+# get the list of entries
+for file in userDir:
+    if file.startswith("L-"):
+        entryNum = int(file.replace("L-",""))
+
+        entryArray.append(entryNum)
+
+        if( highNum < entryNum ):
+            highNum = entryNum
+
+entryArray.sort()
+
+# start processing entries
+for entry in entryArray:
+    print entry
+    addEntryForId(outDoc, "grahams", entry)
+
 
 # Print our newly created XML
 print outDoc.toprettyxml(indent="  ")