8 from time import strptime, strftime
10 def getNodeText(doc, nodename):
14 nodelist = doc.getElementsByTagName(nodename)[0].childNodes
19 if node.nodeType == node.TEXT_NODE:
24 def appendTextNode(doc, parent, nodename, value):
27 # make sure value is properly encoded
29 bytes = nodeValue.encode("UTF-8")
31 bytes = nodeValue.encode("cp1252")
32 nodeValue = unicode(bytes, "UTF-8")
34 element = doc.createElement(nodename)
36 if( nodeValue != "" ):
37 textNode = doc.createTextNode(nodeValue)
38 element.appendChild(textNode)
40 parent.appendChild(element)
43 def addEntryForId(outDoc, element, username, id):
44 entryFile = open("%s/L-%s" % (username,id), "r")
45 inDoc = xml.dom.minidom.parse(entryFile)
47 # Create an entry element
48 entry = outDoc.createElement("entry")
49 element.appendChild(entry)
51 # Create an itemid element
52 appendTextNode(outDoc, entry, "itemid", getNodeText(inDoc,"itemid"))
54 # Create an eventtime element
55 appendTextNode(outDoc, entry, "eventtime", getNodeText(inDoc, "eventtime"))
57 # Create an subject element
58 appendTextNode(outDoc, entry, "subject", getNodeText(inDoc, "subject"))
60 # Create an event node (special case because for some reason there are two
61 # 'event' elements in the pydump output, which is probably LJ's fault)
62 event = inDoc.getElementsByTagName("event")[0]
63 appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
65 # Create an allowmask element (doesn't exist in pydump output if public)
66 maskText = getNodeText(inDoc, "allowmask")
68 # XXXSMG: consult L-1411 and L-976 for examples of security and
71 appendTextNode(outDoc, entry, "allowmask", maskText)
73 appendTextNode(outDoc, entry, "allowmask", "0")
75 # Create a taglist element
76 appendTextNode(outDoc, entry, "taglist", getNodeText(inDoc, "taglist"))
78 # XXXSMG: make sure there is a comment file before trying to do anything
80 addCommentsForId(outDoc, entry, username, id)
82 def addCommentsForId(outDoc, entry, username, id):
84 commentFile = open("%s/C-%s" % (username,id), "r")
85 except IOError: # there are no comments for this entry
88 inDoc = xml.dom.minidom.parse(commentFile)
90 comments = inDoc.getElementsByTagName("comment")
92 for comment in comments:
93 outComment = outDoc.createElement("comment")
94 entry.appendChild(outComment)
96 # add the item id for the comment
97 appendTextNode(outDoc, outComment, "itemid",
98 getNodeText(comment, "id"))
100 # convert the time string
101 timeString = getNodeText(comment, "date")
102 if( timeString != "" ):
103 inDate = strptime(timeString, "%Y-%m-%dT%H:%M:%SZ")
104 outDate = strftime("%Y-%m-%d %H:%M:%S", inDate)
105 appendTextNode(outDoc, outComment, "eventtime", outDate)
107 emptyTime = outDoc.createElement("eventtime")
108 outComment.appendChild(emptyTime)
110 # Create an subject element
111 appendTextNode(outDoc, outComment, "subject",
112 getNodeText(comment, "subject"))
114 # Create an event element
115 appendTextNode(outDoc, outComment, "event",
116 getNodeText(comment, "body"))
118 # Create the author element
119 author = outDoc.createElement("author")
120 outComment.appendChild(author)
123 cUser = getNodeText(comment, "user")
127 appendTextNode(outDoc, author, "name", cUser)
128 appendTextNode(outDoc, author, "email", cUser + "@livejournal.com")
130 # Create the parent_itemid
131 parentId = getNodeText(comment, "parentid")
133 appendTextNode(outDoc, outComment, "parent_itemid", parentId)
140 if( len(argv) != 2 ):
141 print( "Usage: convertdump.py <username> <entrylimit>" )
145 entryLimit = int(argv[1])
147 userDir = os.listdir(username)
152 # get the list of entries
154 if file.startswith("L-"):
155 entryNum = int(file.replace("L-",""))
157 entryArray.append(entryNum)
159 if( highNum < entryNum ):
165 # Create the minidom document
166 outDoc = xml.dom.minidom.Document()
168 # Create the <livejournal> base element
169 ljElement = outDoc.createElement("livejournal")
170 outDoc.appendChild(ljElement)
174 # start processing entries
175 for entry in entryArray:
176 addEntryForId(outDoc, ljElement, username, entry)
178 currentFileEntry += 1
180 if( currentFileEntry == entryLimit or entry == entryArray[-1] ):
182 f = open("%s - %s.xml" % (username, entry), "w")
183 tempXML = outDoc.toxml("UTF-8")
188 # Create the minidom document
189 outDoc = xml.dom.minidom.Document()
191 # Create the <livejournal> base element
192 ljElement = outDoc.createElement("livejournal")
193 outDoc.appendChild(ljElement)
195 if __name__ == "__main__":