X-Git-Url: https://wagner.pp.ru/gitweb/?a=blobdiff_plain;f=convertdump.py;h=9a796cbbf873fa8c519a6f567f622503bc7626a2;hb=4e30b2386cd897f861f118ace0de79cb058496a4;hp=6f394ce7433093d47b625d4520d02b773b803d9e;hpb=f18d217d036f861073374788434be94779c87f90;p=oss%2Fljdump.git diff --git a/convertdump.py b/convertdump.py index 6f394ce..9a796cb 100755 --- a/convertdump.py +++ b/convertdump.py @@ -1,11 +1,17 @@ #!/usr/bin/python import xml.dom.minidom +import os +import codecs +from time import strptime, strftime def getNodeText(doc, nodename): rc = "" - nodelist = doc.getElementsByTagName(nodename)[0].childNodes + try: + nodelist = doc.getElementsByTagName(nodename)[0].childNodes + except: + return "" for node in nodelist: if node.nodeType == node.TEXT_NODE: @@ -14,13 +20,25 @@ def getNodeText(doc, nodename): return rc def appendTextNode(doc, parent, nodename, value): + nodeValue = value + + # make sure value is properly encoded + try: + bytes = nodeValue.encode("UTF-8") + except: + bytes = nodeValue.encode("cp1252") + nodeValue = unicode(bytes, "UTF-8") + element = doc.createElement(nodename) - textNode = doc.createTextNode(value) - element.appendChild(textNode) + + if( nodeValue != "" ): + textNode = doc.createTextNode(nodeValue) + element.appendChild(textNode) + parent.appendChild(element) -def addEntryForID(doc, username, id): +def addEntryForId(outDoc, username, id): entryFile = open("%s/L-%s" % (username,id), "r") inDoc = xml.dom.minidom.parse(entryFile) @@ -43,10 +61,11 @@ def addEntryForID(doc, username, id): appendTextNode(outDoc, entry, "event", getNodeText(event, "event")) # Create an allowmask element (doesn't exist in pydump output if public) - try: - appendTextNode(outDoc, entry, "allowmask", - getNodeText(inDoc, "allowmask")) - except: + maskText = getNodeText(inDoc, "allowmask") + + if(maskText != ""): + appendTextNode(outDoc, entry, "allowmask", maskText) + else: appendTextNode(outDoc, entry, "allowmask", "0") # Create a taglist element @@ -54,8 +73,81 @@ def addEntryForID(doc, username, id): # XXXSMG: make sure there is a comment file before trying to do anything # with it - commentFile = open("%s/C-%s" % (username,id), "r") - + addCommentsForId(outDoc, entry, username, id) + +def addCommentsForId(outDoc, entry, username, id): + try: + commentFile = open("%s/C-%s" % (username,id), "r") + except IOError: # there are no comments for this entry + return + + inDoc = xml.dom.minidom.parse(commentFile) + + comments = inDoc.getElementsByTagName("comment") + + for comment in comments: + outComment = outDoc.createElement("comment") + entry.appendChild(outComment) + + # add the item id for the comment + appendTextNode(outDoc, outComment, "itemid", + getNodeText(comment, "id")) + + # convert the time string + timeString = getNodeText(comment, "date") + if( timeString != "" ): + inDate = strptime(timeString, "%Y-%m-%dT%H:%M:%SZ") + outDate = strftime("%Y-%m-%d %H:%M:%S", inDate) + appendTextNode(outDoc, outComment, "eventtime", outDate) + else: + emptyTime = outDoc.createElement("eventtime") + outComment.appendChild(emptyTime) + + # Create an subject element + appendTextNode(outDoc, outComment, "subject", + getNodeText(comment, "subject")) + + # Create an event element + appendTextNode(outDoc, outComment, "event", + getNodeText(comment, "body")) + + # Create the author element + author = outDoc.createElement("author") + outComment.appendChild(author) + + try: + cUser = getNodeText(comment, "user") + except: + cUser = "anonymous" + + appendTextNode(outDoc, author, "name", cUser) + appendTextNode(outDoc, author, "email", cUser + "@livejournal.com") + + # Create the parent_itemid + parentId = getNodeText(comment, "parentid") + if(parentId != ""): + appendTextNode(outDoc, outComment, "parent_itemid", parentId) + + + + +userDir = os.listdir("grahams") + +highNum = -1 +entryArray = [] + +# get the list of entries +for file in userDir: + if file.startswith("L-"): + entryNum = int(file.replace("L-","")) + + entryArray.append(entryNum) + + if( highNum < entryNum ): + highNum = entryNum + +entryArray.sort() + # Create the minidom document outDoc = xml.dom.minidom.Document() @@ -64,7 +156,26 @@ outDoc = xml.dom.minidom.Document() ljElement = outDoc.createElement("livejournal") outDoc.appendChild(ljElement) -addEntryForID(outDoc, "grahams", "2583") +breakup = 250 +currentFileEntry = 0 + +# start processing entries +for entry in entryArray: + addEntryForId(outDoc, "grahams", entry) + + currentFileEntry += 1 + + if( currentFileEntry == breakup ): + + f = open("grahams - %s.xml" % entry, "w") + tempXML = outDoc.toxml("UTF-8") + f.write(tempXML) + + currentFileEntry = 0 + + # Create the minidom document + outDoc = xml.dom.minidom.Document() -# Print our newly created XML -print outDoc.toprettyxml(indent=" ") + # Create the base element + ljElement = outDoc.createElement("livejournal") + outDoc.appendChild(ljElement)