#
# ljdump.py - livejournal archiver
# Greg Hewgill <greg@hewgill.com> http://hewgill.com
-# Version 1.4
+# Version 1.5.1
#
# LICENSE
#
# misrepresented as being the original software.
# 3. This notice may not be removed or altered from any source distribution.
#
-# Copyright (c) 2005-2009 Greg Hewgill
+# Copyright (c) 2005-2010 Greg Hewgill and contributors
-import codecs, md5, os, pickle, pprint, re, shutil, sys, urllib2, xml.dom.minidom, xmlrpclib
+import codecs, os, pickle, pprint, re, shutil, sys, urllib2, xml.dom.minidom, xmlrpclib
+import time
from xml.sax import saxutils
MimeExtensions = {
"image/png": ".png",
}
+try:
+ from hashlib import md5
+except ImportError:
+ import md5 as _md5
+ md5 = _md5.new
+
def calcchallenge(challenge, password):
- return md5.new(challenge+md5.new(password).hexdigest()).hexdigest()
+ return md5(challenge+md5(password).hexdigest()).hexdigest()
def flatresponse(response):
r = {}
'getpickwurls': 1,
}, Password))
userpics = dict(zip(map(str, r['pickws']), r['pickwurls']))
- userpics['*'] = r['defaultpicurl']
+ if r['defaultpicurl']:
+ userpics['*'] = r['defaultpicurl']
while True:
+ time.sleep(0.2)
r = server.LJ.XMLRPC.syncitems(dochallenge(server, {
'username': Username,
'ver': 1,
if item['item'][0] == 'L':
print "Fetching journal entry %s (%s)" % (item['item'], item['action'])
try:
+ time.sleep(0.2)
e = server.LJ.XMLRPC.getevents(dochallenge(server, {
'username': Username,
'ver': 1,
print "Error getting item: %s" % item['item']
pprint.pprint(x)
errors += 1
+ if str(x).find("will be able to continue posting within an hour."):
+ print "Waiting a hour"
+ time.sleep(3600)
+ continue
lastsync = item['time']
writelast(Journal, lastsync, lastmaxid)
maxid = lastmaxid
while True:
try:
- try:
+ try:
+ time.sleep(0.2)
r = urllib2.urlopen(urllib2.Request(Server+"/export_comments.bml?get=comment_meta&startid=%d%s" % (maxid+1, authas), headers = {'Cookie': "ljsession="+ljsession}))
meta = xml.dom.minidom.parse(r)
- except:
+ except Exception, x:
print "*** Error fetching comment meta, possibly not community maintainer?"
- break
+ print "***", x
+ maxid += 200
+ continue
finally:
- r.close()
+ try:
+ r.close()
+ except AttributeError: # r is sometimes a dict for unknown reasons
+ pass
+ nxid=meta.getElementsByTagName("nextid")
+ if len(nxid):
+ nxid = nxid[0].firstChild.nodeValue
+ else:
+ nxid = None
+ print "Got meta data maxid = %d nextid=%s"%(
+ int(meta.getElementsByTagName("maxid")[0].firstChild.nodeValue),
+ nxid
+ )
for c in meta.getElementsByTagName("comment"):
id = int(c.getAttribute("id"))
metacache[id] = {
try:
r = urllib2.urlopen(urllib2.Request(Server+"/export_comments.bml?get=comment_body&startid=%d%s" % (maxid+1, authas), headers = {'Cookie': "ljsession="+ljsession}))
meta = xml.dom.minidom.parse(r)
- except:
+ except Exception, x:
print "*** Error fetching comment body, possibly not community maintainer?"
- break
+ print "*** requested id %d "%(maxid+1)
+ maxid+=1
+ print "***", x
+ continue
finally:
r.close()
for c in meta.getElementsByTagName("comment"):
ljdump(server, username, password, journal)
else:
ljdump(server, username, password, username)
+# vim:ts=4 et: