]> wagner.pp.ru Git - oss/ljdump.git/blobdiff - convertdump.py
mostly working now. need to unhardcode username and breakup limit
[oss/ljdump.git] / convertdump.py
index 6f394ce7433093d47b625d4520d02b773b803d9e..9a796cbbf873fa8c519a6f567f622503bc7626a2 100755 (executable)
@@ -1,11 +1,17 @@
 #!/usr/bin/python
 
 import xml.dom.minidom 
+import os
+import codecs
+from time import strptime, strftime
 
 def getNodeText(doc, nodename):
     rc = ""
 
-    nodelist = doc.getElementsByTagName(nodename)[0].childNodes
+    try:
+        nodelist = doc.getElementsByTagName(nodename)[0].childNodes
+    except:
+        return ""
 
     for node in nodelist:
         if node.nodeType == node.TEXT_NODE:
@@ -14,13 +20,25 @@ def getNodeText(doc, nodename):
     return rc
 
 def appendTextNode(doc, parent, nodename, value):
+    nodeValue = value
+
+    # make sure value is properly encoded
+    try:
+        bytes = nodeValue.encode("UTF-8")
+    except:
+        bytes = nodeValue.encode("cp1252")
+        nodeValue = unicode(bytes, "UTF-8")
+
     element = doc.createElement(nodename)
-    textNode = doc.createTextNode(value)
-    element.appendChild(textNode)
+
+    if( nodeValue != "" ): 
+        textNode = doc.createTextNode(nodeValue)
+        element.appendChild(textNode)
+
     parent.appendChild(element)
 
 
-def addEntryForID(doc, username, id):
+def addEntryForId(outDoc, username, id):
     entryFile = open("%s/L-%s" % (username,id), "r")
     inDoc = xml.dom.minidom.parse(entryFile)
 
@@ -43,10 +61,11 @@ def addEntryForID(doc, username, id):
     appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
 
     # Create an allowmask element (doesn't exist in pydump output if public)
-    try:
-        appendTextNode(outDoc, entry, "allowmask", 
-            getNodeText(inDoc, "allowmask"))
-    except:
+    maskText = getNodeText(inDoc, "allowmask")
+
+    if(maskText != ""):
+        appendTextNode(outDoc, entry, "allowmask", maskText)
+    else:
         appendTextNode(outDoc, entry, "allowmask", "0")
 
     # Create a taglist element
@@ -54,8 +73,81 @@ def addEntryForID(doc, username, id):
 
     # XXXSMG: make sure there is a comment file before trying to do anything
     # with it
-    commentFile = open("%s/C-%s" % (username,id), "r")
-    
+    addCommentsForId(outDoc, entry, username, id)
+
+def addCommentsForId(outDoc, entry, username, id):
+    try: 
+        commentFile = open("%s/C-%s" % (username,id), "r")
+    except IOError:  # there are no comments for this entry
+        return
+
+    inDoc = xml.dom.minidom.parse(commentFile)
+
+    comments = inDoc.getElementsByTagName("comment")
+
+    for comment in comments:
+        outComment = outDoc.createElement("comment")
+        entry.appendChild(outComment)
+
+        # add the item id for the comment
+        appendTextNode(outDoc, outComment, "itemid", 
+            getNodeText(comment, "id"))
+
+        # convert the time string
+        timeString = getNodeText(comment, "date")
+        if( timeString != "" ):
+            inDate = strptime(timeString, "%Y-%m-%dT%H:%M:%SZ")
+            outDate = strftime("%Y-%m-%d %H:%M:%S", inDate)
+            appendTextNode(outDoc, outComment, "eventtime", outDate)
+        else:
+            emptyTime = outDoc.createElement("eventtime")
+            outComment.appendChild(emptyTime)
+
+        # Create an subject element
+        appendTextNode(outDoc, outComment, "subject", 
+            getNodeText(comment, "subject"))
+
+        # Create an event element
+        appendTextNode(outDoc, outComment, "event", 
+            getNodeText(comment, "body"))
+
+        # Create the author element
+        author = outDoc.createElement("author")
+        outComment.appendChild(author)
+
+        try:
+            cUser = getNodeText(comment, "user")
+        except:
+            cUser = "anonymous"
+
+        appendTextNode(outDoc, author, "name", cUser)
+        appendTextNode(outDoc, author, "email", cUser + "@livejournal.com")
+        
+        # Create the parent_itemid
+        parentId = getNodeText(comment, "parentid")
+        if(parentId != ""): 
+            appendTextNode(outDoc, outComment, "parent_itemid", parentId)
+
+
+
+
+userDir = os.listdir("grahams")
+
+highNum = -1
+entryArray = []
+
+# get the list of entries
+for file in userDir:
+    if file.startswith("L-"):
+        entryNum = int(file.replace("L-",""))
+
+        entryArray.append(entryNum)
+
+        if( highNum < entryNum ):
+            highNum = entryNum
+
+entryArray.sort()
+
 
 # Create the minidom document
 outDoc = xml.dom.minidom.Document()
@@ -64,7 +156,26 @@ outDoc = xml.dom.minidom.Document()
 ljElement = outDoc.createElement("livejournal")
 outDoc.appendChild(ljElement)
 
-addEntryForID(outDoc, "grahams", "2583")
+breakup = 250
+currentFileEntry = 0
+
+# start processing entries
+for entry in entryArray:
+    addEntryForId(outDoc, "grahams", entry)
+
+    currentFileEntry += 1
+
+    if( currentFileEntry == breakup ):
+
+        f = open("grahams - %s.xml" % entry, "w")
+        tempXML = outDoc.toxml("UTF-8")
+        f.write(tempXML)
+        
+        currentFileEntry = 0
+
+        # Create the minidom document
+        outDoc = xml.dom.minidom.Document()
 
-# Print our newly created XML
-print outDoc.toprettyxml(indent="  ")
+        # Create the <livejournal> base element
+        ljElement = outDoc.createElement("livejournal")
+        outDoc.appendChild(ljElement)