3 # Copyright 2009, Sean M. Graham (www.sean-graham.com)
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
10 # - Redistributions of source code must retain the above copyright notice,
11 # this list of conditions and the following disclaimer.
13 # - Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
17 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
20 # EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
23 # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
26 # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 import xml.dom.minidom
33 from time import strptime, strftime
35 def getNodeText(doc, nodename):
39 nodelist = doc.getElementsByTagName(nodename)[0].childNodes
44 if node.nodeType == node.TEXT_NODE:
49 def appendTextNode(doc, parent, nodename, value):
52 # make sure value is properly encoded
54 bytes = nodeValue.encode("UTF-8")
56 bytes = nodeValue.encode("cp1252")
57 nodeValue = unicode(bytes, "UTF-8")
59 element = doc.createElement(nodename)
61 if( nodeValue != "" ):
62 textNode = doc.createTextNode(nodeValue)
63 element.appendChild(textNode)
65 parent.appendChild(element)
68 def addEntryForId(outDoc, element, username, id):
69 entryFile = open("%s/L-%s" % (username,id), "r")
70 inDoc = xml.dom.minidom.parse(entryFile)
72 # Create an entry element
73 entry = outDoc.createElement("entry")
74 element.appendChild(entry)
76 # Create an itemid element
77 appendTextNode(outDoc, entry, "itemid", getNodeText(inDoc,"itemid"))
79 # Create an eventtime element
80 appendTextNode(outDoc, entry, "eventtime", getNodeText(inDoc, "eventtime"))
82 # Create an subject element
83 appendTextNode(outDoc, entry, "subject", getNodeText(inDoc, "subject"))
85 # Create an event node (special case because for some reason there are two
86 # 'event' elements in the pydump output, which is probably LJ's fault)
87 event = inDoc.getElementsByTagName("event")[0]
88 appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
90 # Create an allowmask element (doesn't exist in pydump output if public)
91 maskText = getNodeText(inDoc, "allowmask")
93 # XXXSMG: consult L-1411 and L-976 for examples of security and
96 appendTextNode(outDoc, entry, "allowmask", maskText)
98 appendTextNode(outDoc, entry, "allowmask", "0")
100 # Create a taglist element
101 appendTextNode(outDoc, entry, "taglist", getNodeText(inDoc, "taglist"))
103 # XXXSMG: make sure there is a comment file before trying to do anything
105 addCommentsForId(outDoc, entry, username, id)
107 def addCommentsForId(outDoc, entry, username, id):
109 commentFile = open("%s/C-%s" % (username,id), "r")
110 except IOError: # there are no comments for this entry
113 inDoc = xml.dom.minidom.parse(commentFile)
115 comments = inDoc.getElementsByTagName("comment")
117 for comment in comments:
118 outComment = outDoc.createElement("comment")
119 entry.appendChild(outComment)
121 # add the item id for the comment
122 appendTextNode(outDoc, outComment, "itemid",
123 getNodeText(comment, "id"))
125 # convert the time string
126 timeString = getNodeText(comment, "date")
127 if( timeString != "" ):
128 inDate = strptime(timeString, "%Y-%m-%dT%H:%M:%SZ")
129 outDate = strftime("%Y-%m-%d %H:%M:%S", inDate)
130 appendTextNode(outDoc, outComment, "eventtime", outDate)
132 emptyTime = outDoc.createElement("eventtime")
133 outComment.appendChild(emptyTime)
135 # Create an subject element
136 appendTextNode(outDoc, outComment, "subject",
137 getNodeText(comment, "subject"))
139 # Create an event element
140 appendTextNode(outDoc, outComment, "event",
141 getNodeText(comment, "body"))
143 # Create the author element
144 author = outDoc.createElement("author")
145 outComment.appendChild(author)
148 cUser = getNodeText(comment, "user")
152 appendTextNode(outDoc, author, "name", cUser)
153 appendTextNode(outDoc, author, "email", cUser + "@livejournal.com")
155 # Create the parent_itemid
156 parentId = getNodeText(comment, "parentid")
158 appendTextNode(outDoc, outComment, "parent_itemid", parentId)
165 if( len(argv) != 2 ):
166 print( "Usage: convertdump.py <username> <entrylimit>" )
170 entryLimit = int(argv[1])
172 userDir = os.listdir(username)
177 # get the list of entries
179 if file.startswith("L-"):
180 entryNum = int(file.replace("L-",""))
182 entryArray.append(entryNum)
184 if( highNum < entryNum ):
190 # Create the minidom document
191 outDoc = xml.dom.minidom.Document()
193 # Create the <livejournal> base element
194 ljElement = outDoc.createElement("livejournal")
195 outDoc.appendChild(ljElement)
199 # start processing entries
200 for entry in entryArray:
201 addEntryForId(outDoc, ljElement, username, entry)
203 currentFileEntry += 1
205 if( currentFileEntry == entryLimit or entry == entryArray[-1] ):
207 f = open("%s - %s.xml" % (username, entry), "w")
208 tempXML = outDoc.toxml("UTF-8")
213 # Create the minidom document
214 outDoc = xml.dom.minidom.Document()
216 # Create the <livejournal> base element
217 ljElement = outDoc.createElement("livejournal")
218 outDoc.appendChild(ljElement)
220 if __name__ == "__main__":