X-Git-Url: https://wagner.pp.ru/gitweb/?a=blobdiff_plain;f=convertdump.py;h=0f59e8282d6dc9e2e9fa481a1312201b6468c2bf;hb=HEAD;hp=f098a58f2a2e0b29fd0fbaca86ceb3a5a7e75509;hpb=cde42b8180a56fb57c1c4e3e3ee4b9ae66707f0e;p=oss%2Fljdump.git diff --git a/convertdump.py b/convertdump.py index f098a58..0f59e82 100755 --- a/convertdump.py +++ b/convertdump.py @@ -30,6 +30,7 @@ import os import codecs import sys import getopt +import re from time import strptime, strftime @@ -85,7 +86,9 @@ def addEntryForId(outDoc, element, username, id, includeSecure): # Create an event node (special case because for some reason there are two # 'event' elements in the pydump output, which is probably LJ's fault) event = inDoc.getElementsByTagName("event")[0] - appendTextNode(outDoc, entry, "event", getNodeText(event, "event")) + eventText = getNodeText(event, "event") + + appendTextNode(outDoc, entry, "event", replaceLJTags(eventText)) security = getNodeText(inDoc, "security") @@ -152,8 +155,8 @@ def addCommentsForId(outDoc, entry, username, id): getNodeText(comment, "subject")) # Create an event element - appendTextNode(outDoc, outComment, "event", - getNodeText(comment, "body")) + bodyText = getNodeText(comment, "body") + appendTextNode(outDoc, outComment, "event", replaceLJTags(bodyText)) # Create the author element author = outDoc.createElement("author") @@ -172,6 +175,41 @@ def addCommentsForId(outDoc, entry, username, id): if(parentId != ""): appendTextNode(outDoc, outComment, "parent_itemid", parentId) + +# regular expressions used in replaceLJTags() +# (global for later reuse - suggestion by jparise) + +userRE = re.compile('', re.IGNORECASE) +commRE = re.compile('', re.IGNORECASE) +namedCutRE = re.compile('', + re.IGNORECASE|re.DOTALL) +cutRE = re.compile('', re.IGNORECASE) +cutRE = re.compile('', re.IGNORECASE) +embedRE = re.compile('', re.IGNORECASE) + +def replaceLJTags(entry): + rv = entry + + # replace lj user tags + rv = re.sub(userRE, '\\1', rv) + + # replace lj comm tags + rv = re.sub(commRE, '\\1', rv) + + # replace lj-cut tags + rv = re.sub(namedCutRE, '', rv) + rv = re.sub(cutRE, '', rv) + rv = re.sub(cutRE, '', rv) + + # replace lj-embed tags + # this doesn't actually work. LJ doesn't include the embedded content + # when ljdump calls 'getevents', but instead includes an lj-embed tag + # with an id and nothing else. + #rv = re.sub(embedRE, '', rv) + + return rv + + def usage(): print( "Usage: convertdump.py [arguments]" ) print( """