+
+# regular expressions used in replaceLJTags()
+# (global for later reuse - suggestion by jparise)
+
+userRE = re.compile('<lj user="(.*?)" ?/?>', re.IGNORECASE)
+commRE = re.compile('<lj comm="(.*?)" ?/?>', re.IGNORECASE)
+namedCutRE = re.compile('<lj-cut +text="(.*?)" ?/?>',
+ re.IGNORECASE|re.DOTALL)
+cutRE = re.compile('<lj-cut>', re.IGNORECASE)
+cutRE = re.compile('</lj-cut>', re.IGNORECASE)
+embedRE = re.compile('<lj-embed id="[0-9]+">', re.IGNORECASE)
+
+def replaceLJTags(entry):
+ rv = entry
+
+ # replace lj user tags
+ rv = re.sub(userRE, '<a href="http://www.livejournal.com/users/\\1" class="lj-user">\\1</a>', rv)
+
+ # replace lj comm tags
+ rv = re.sub(commRE, '<a href="http://community.livejournal.com/\\1/" class="lj-comm">\\1</a>', rv)
+
+ # replace lj-cut tags
+ rv = re.sub(namedCutRE, '<!--more \\1-->', rv)
+ rv = re.sub(cutRE, '<!--more-->', rv)
+ rv = re.sub(cutRE, '', rv)
+
+ # replace lj-embed tags
+ # this doesn't actually work. LJ doesn't include the embedded content
+ # when ljdump calls 'getevents', but instead includes an lj-embed tag
+ # with an id and nothing else.
+ #rv = re.sub(embedRE, '', rv)
+
+ return rv
+
+
+def usage():
+ print( "Usage: convertdump.py [arguments]" )
+ print( """
+This will convert a pydump archive into something compatible with the
+WordPress LiveJournal importer. This is the same format used by the Windows
+ljArchive exporter.
+
+Arguments:
+ -u --user username of archive to process [required]
+ -l --limit limit the number of entries in each xml file (default 250)
+ -i --insecure include private and protected entries in the output
+ -h --help show this help page
+
+Example:
+ ./convertdump.py --user stevemartin --limit 200 --insecure
+""")
+
+