X-Git-Url: https://wagner.pp.ru/gitweb/?a=blobdiff_plain;f=ljdump.py;h=a347dc285e666ac5e5d8358cb5d8a6c3a92da17f;hb=a1ab8ec4b8c816003df57d3e57010eefaa8bb19b;hp=e2114b33b86dbd9234c3719f9962c4eb75324acd;hpb=58585cc8ce90b25a571782a1e4ccbce95a1eae25;p=oss%2Fljdump.git diff --git a/ljdump.py b/ljdump.py index e2114b3..a347dc2 100755 --- a/ljdump.py +++ b/ljdump.py @@ -2,7 +2,7 @@ # # ljdump.py - livejournal archiver # Greg Hewgill http://hewgill.com -# Version 1.2 +# Version 1.3.2 # # $Id$ # @@ -26,6 +26,9 @@ # This program may be run as often as needed to bring the backup copy up # to date. Both new and updated items are downloaded. # +# The community http://ljdump.livejournal.com has been set up for questions +# or comments. +# # LICENSE # # This software is provided 'as-is', without any express or implied @@ -44,11 +47,17 @@ # misrepresented as being the original software. # 3. This notice may not be removed or altered from any source distribution. # -# Copyright (c) 2005-2006 Greg Hewgill +# Copyright (c) 2005-2009 Greg Hewgill -import codecs, md5, os, pickle, pprint, re, sys, urllib2, xml.dom.minidom, xmlrpclib +import codecs, md5, os, pickle, pprint, re, shutil, sys, urllib2, xml.dom.minidom, xmlrpclib from xml.sax import saxutils +MimeExtensions = { + "image/gif": ".gif", + "image/jpeg": ".jpg", + "image/png": ".png", +} + def calcchallenge(challenge, password): return md5.new(challenge+md5.new(password).hexdigest()).hexdigest() @@ -100,6 +109,12 @@ def writedump(fn, event): dumpelement(f, "event", event) f.close() +def writelast(): + f = open("%s/.last" % Username, "w") + f.write("%s\n" % lastsync) + f.write("%s\n" % lastmaxid) + f.close() + def createxml(doc, name, map): e = doc.createElement(name) for k in map.keys(): @@ -184,13 +199,18 @@ while True: 'selecttype': "one", 'itemid': item['item'][2:], }, Password)) - writedump("%s/%s" % (Username, item['item']), e['events'][0]) - newentries += 1 + if e['events']: + writedump("%s/%s" % (Username, item['item']), e['events'][0]) + newentries += 1 + else: + print "Unexpected empty item: %s" % item['item'] + errors += 1 except xmlrpclib.Fault, x: print "Error getting item: %s" % item['item'] pprint.pprint(x) errors += 1 lastsync = item['time'] + writelast() # The following code doesn't work because the server rejects our repeated calls. # http://www.livejournal.com/doc/server/ljp.csp.xml-rpc.getevents.html @@ -256,11 +276,25 @@ f = open("%s/user.map" % Username, "w") pickle.dump(usermap, f) f.close() +print "Fetching userpics for: %s" % Username f = open("%s/userpics.xml" % Username, "w") print >>f, """""" print >>f, "" for p in userpics: print >>f, """""" % (p, userpics[p]) + pic = urllib2.urlopen(userpics[p]) + ext = MimeExtensions.get(pic.info()["Content-Type"], "") + picfn = re.sub(r"[\/]", "_", p) + try: + picfn = codecs.utf_8_decode(picfn)[0] + picf = open("%s/%s%s" % (Username, picfn, ext), "wb") + except: + # for installations where the above utf_8_decode doesn't work + picfn = "".join([ord(x) < 128 and x or "?" for x in picfn]) + picf = open("%s/%s%s" % (Username, picfn, ext), "wb") + shutil.copyfileobj(pic, picf) + pic.close() + picf.close() print >>f, "" f.close() @@ -307,10 +341,7 @@ while True: lastmaxid = maxid -f = open("%s/.last" % Username, "w") -f.write("%s\n" % lastsync) -f.write("%s\n" % lastmaxid) -f.close() +writelast() if origlastsync: print "%d new entries, %d new comments (since %s)" % (newentries, newcomments, origlastsync)