+ finally:
+ try:
+ r.close()
+ except AttributeError: # r is sometimes a dict for unknown reasons
+ pass
+ for c in meta.getElementsByTagName("comment"):
+ id = int(c.getAttribute("id"))
+ metacache[id] = {
+ 'posterid': c.getAttribute("posterid"),
+ 'state': c.getAttribute("state"),
+ }
+ if id > maxid:
+ maxid = id
+ for u in meta.getElementsByTagName("usermap"):
+ usermap[u.getAttribute("id")] = u.getAttribute("user")
+ if maxid >= int(meta.getElementsByTagName("maxid")[0].firstChild.nodeValue):
+ break
+
+ f = open("%s/comment.meta" % Journal, "w")
+ pickle.dump(metacache, f)
+ f.close()
+
+ f = open("%s/user.map" % Journal, "w")
+ pickle.dump(usermap, f)
+ f.close()
+
+ newmaxid = maxid
+ maxid = lastmaxid
+ while True:
+ try:
+ try:
+ r = urllib2.urlopen(urllib2.Request(Server+"/export_comments.bml?get=comment_body&startid=%d%s" % (maxid+1, authas), headers = {'Cookie': "ljsession="+ljsession}))
+ meta = xml.dom.minidom.parse(r)
+ except:
+ print "*** Error fetching comment body, possibly not community maintainer?"
+ break
+ finally:
+ r.close()
+ for c in meta.getElementsByTagName("comment"):
+ id = int(c.getAttribute("id"))
+ jitemid = c.getAttribute("jitemid")
+ comment = {
+ 'id': str(id),
+ 'parentid': c.getAttribute("parentid"),
+ 'subject': gettext(c.getElementsByTagName("subject")),
+ 'date': gettext(c.getElementsByTagName("date")),
+ 'body': gettext(c.getElementsByTagName("body")),
+ 'state': metacache[id]['state'],
+ }
+ if usermap.has_key(c.getAttribute("posterid")):
+ comment["user"] = usermap[c.getAttribute("posterid")]
+ try:
+ entry = xml.dom.minidom.parse("%s/C-%s" % (Journal, jitemid))
+ except:
+ entry = xml.dom.minidom.getDOMImplementation().createDocument(None, "comments", None)
+ found = False
+ for d in entry.getElementsByTagName("comment"):
+ if int(d.getElementsByTagName("id")[0].firstChild.nodeValue) == id:
+ found = True
+ break
+ if found:
+ print "Warning: downloaded duplicate comment id %d in jitemid %s" % (id, jitemid)
+ else:
+ entry.documentElement.appendChild(createxml(entry, "comment", comment))
+ f = codecs.open("%s/C-%s" % (Journal, jitemid), "w", "UTF-8")
+ entry.writexml(f)
+ f.close()
+ newcomments += 1
+ if id > maxid:
+ maxid = id
+ if maxid >= newmaxid:
+ break
+
+ lastmaxid = maxid
+
+ writelast(Journal, lastsync, lastmaxid)
+
+ if Username == Journal:
+ print "Fetching userpics for: %s" % Username
+ f = open("%s/userpics.xml" % Username, "w")
+ print >>f, """<?xml version="1.0"?>"""
+ print >>f, "<userpics>"
+ for p in userpics:
+ print >>f, """<userpic keyword="%s" url="%s" />""" % (p, userpics[p])
+ pic = urllib2.urlopen(userpics[p])
+ ext = MimeExtensions.get(pic.info()["Content-Type"], "")
+ picfn = re.sub(r'[*?\\/:<>"|]', "_", p)
+ try:
+ picfn = codecs.utf_8_decode(picfn)[0]
+ picf = open("%s/%s%s" % (Username, picfn, ext), "wb")
+ except:
+ # for installations where the above utf_8_decode doesn't work
+ picfn = "".join([ord(x) < 128 and x or "_" for x in picfn])
+ picf = open("%s/%s%s" % (Username, picfn, ext), "wb")
+ shutil.copyfileobj(pic, picf)
+ pic.close()
+ picf.close()
+ print >>f, "</userpics>"
+ f.close()
+
+ if origlastsync:
+ print "%d new entries, %d new comments (since %s)" % (newentries, newcomments, origlastsync)
+ else:
+ print "%d new entries, %d new comments" % (newentries, newcomments)
+ if errors > 0:
+ print "%d errors" % errors
+
+if __name__ == "__main__":
+ if os.access("ljdump.config", os.F_OK):
+ config = xml.dom.minidom.parse("ljdump.config")
+ server = config.documentElement.getElementsByTagName("server")[0].childNodes[0].data
+ username = config.documentElement.getElementsByTagName("username")[0].childNodes[0].data
+ password = config.documentElement.getElementsByTagName("password")[0].childNodes[0].data
+ journals = config.documentElement.getElementsByTagName("journal")
+ if journals:
+ for e in journals:
+ ljdump(server, username, password, e.childNodes[0].data)
+ else:
+ ljdump(server, username, password, username)
+ else:
+ from getpass import getpass
+ print "ljdump - livejournal archiver"
+ print
+ print "Enter your Livejournal username and password."
+ print
+ server = "http://livejournal.com"
+ username = raw_input("Username: ")
+ password = getpass("Password: ")
+ print
+ print "You may back up either your own journal, or a community."
+ print "If you are a community maintainer, you can back up both entries and comments."
+ print "If you are not a maintainer, you can back up only entries."
+ print
+ journal = raw_input("Journal to back up (or hit return to back up '%s'): " % username)
+ print
+ if journal:
+ ljdump(server, username, password, journal)