2011-04-12 07:15:52 +03:00
|
|
|
|
# Encoding: UTF-8
|
2011-04-20 03:35:26 +03:00
|
|
|
|
"""Rewrite markdown links from [Label]{category:thing} to just {category:thing}
|
2011-04-12 07:15:52 +03:00
|
|
|
|
|
2011-04-20 03:35:26 +03:00
|
|
|
|
There was a version of this script that rewrote stuff from an even earlier
|
|
|
|
|
format. Git log should find it without problems.
|
|
|
|
|
|
|
|
|
|
This is an unmaintained one-shot script, only included in the repo for
|
|
|
|
|
reference.
|
2011-04-12 07:15:52 +03:00
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from functools import partial
|
|
|
|
|
import sys
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
from sqlalchemy.orm.exc import MultipleResultsFound
|
|
|
|
|
from sqlalchemy.sql.expression import func
|
|
|
|
|
|
2011-04-20 03:35:26 +03:00
|
|
|
|
from pokedex.db import connect, tables, util
|
2011-04-12 07:15:52 +03:00
|
|
|
|
|
|
|
|
|
sanity_re = re.compile(ur"^[-A-Za-z0-9 é\[\]{}.%':;,×/()\"|–`—!*♂♀\\]$")
|
|
|
|
|
|
|
|
|
|
# RE that matches anything that might look like a link
|
|
|
|
|
fuzzy_link_re = re.compile(r"""
|
|
|
|
|
\[
|
|
|
|
|
[^]]+
|
2011-04-20 03:35:26 +03:00
|
|
|
|
\]?
|
2011-04-12 07:15:52 +03:00
|
|
|
|
\{
|
|
|
|
|
[^}]+
|
|
|
|
|
\}""", re.VERBOSE)
|
|
|
|
|
|
2011-04-20 03:35:26 +03:00
|
|
|
|
# Very specific RE that matches links that appear in source Markdown strings
|
2011-04-12 07:15:52 +03:00
|
|
|
|
strict_link_re = re.compile(r"""
|
|
|
|
|
\[
|
|
|
|
|
(?P<label>
|
|
|
|
|
[-A-Za-z 0-9'.]{,30}
|
|
|
|
|
)
|
|
|
|
|
\]
|
|
|
|
|
\{
|
|
|
|
|
(?P<category>
|
|
|
|
|
[a-z]{,20}
|
|
|
|
|
)
|
2011-04-20 03:35:26 +03:00
|
|
|
|
:
|
|
|
|
|
(?P<target>
|
|
|
|
|
[-a-z 0-9]{,40}
|
|
|
|
|
)
|
|
|
|
|
\}
|
|
|
|
|
""", re.VERBOSE)
|
|
|
|
|
|
|
|
|
|
# Format of the resulting links
|
|
|
|
|
result_link_re = re.compile(r"""
|
|
|
|
|
^
|
|
|
|
|
\[
|
|
|
|
|
(?P<label>
|
|
|
|
|
[^]]*
|
|
|
|
|
)
|
|
|
|
|
\]
|
|
|
|
|
\{
|
|
|
|
|
(?P<category>
|
|
|
|
|
[a-z]+
|
|
|
|
|
)
|
|
|
|
|
:
|
|
|
|
|
(?P<target>
|
|
|
|
|
[-a-z0-9]+
|
|
|
|
|
)
|
2011-04-12 07:15:52 +03:00
|
|
|
|
\}
|
2011-04-20 03:35:26 +03:00
|
|
|
|
$
|
2011-04-12 07:15:52 +03:00
|
|
|
|
""", re.VERBOSE)
|
|
|
|
|
|
|
|
|
|
english_id = 9
|
|
|
|
|
|
2011-04-20 03:35:26 +03:00
|
|
|
|
manual_replacements = {
|
|
|
|
|
'[Pewter Museum of Science]{location:pewter-city}':
|
|
|
|
|
'the Museum of Science in {location:pewter-city}',
|
|
|
|
|
'[Oreburgh Mining Museum]{location:mining-museum}':
|
|
|
|
|
'{location:mining-museum} in {location:oreburgh-city}',
|
|
|
|
|
}
|
|
|
|
|
|
2011-04-12 07:15:52 +03:00
|
|
|
|
def is_md_col(column):
|
|
|
|
|
return column.info.get('format') == 'markdown'
|
|
|
|
|
|
2011-04-20 03:35:26 +03:00
|
|
|
|
def get_replacement(session, entire_text, context, matchobj):
|
2011-04-12 07:15:52 +03:00
|
|
|
|
label = matchobj.group('label')
|
|
|
|
|
category = matchobj.group('category')
|
|
|
|
|
target = matchobj.group('target') or label
|
|
|
|
|
try:
|
2011-04-20 03:35:26 +03:00
|
|
|
|
result = manual_replacements[matchobj.group(0)]
|
2011-04-12 07:15:52 +03:00
|
|
|
|
except KeyError:
|
|
|
|
|
if category == 'mechanic':
|
|
|
|
|
target = target.lower()
|
2011-04-20 03:35:26 +03:00
|
|
|
|
target = target.replace(' ', '-')
|
|
|
|
|
wanted_label = ''
|
2011-04-12 07:15:52 +03:00
|
|
|
|
else:
|
|
|
|
|
query = None
|
|
|
|
|
if category == 'item':
|
|
|
|
|
table = tables.Item
|
|
|
|
|
elif category == 'ability':
|
|
|
|
|
table = tables.Ability
|
|
|
|
|
elif category == 'move':
|
|
|
|
|
table = tables.Move
|
|
|
|
|
elif category == 'type':
|
|
|
|
|
table = tables.Type
|
|
|
|
|
elif category == 'pokemon':
|
|
|
|
|
table = tables.Pokemon
|
|
|
|
|
elif category == 'location':
|
|
|
|
|
table = tables.Location
|
|
|
|
|
else:
|
|
|
|
|
print
|
|
|
|
|
print repr(entire_text)
|
|
|
|
|
print repr(matchobj.group(0))
|
|
|
|
|
raise ValueError('Category %s not implemented' % category)
|
|
|
|
|
try:
|
2011-04-20 03:35:26 +03:00
|
|
|
|
thingy = util.get(session, table, target)
|
|
|
|
|
wanted_label = thingy.name
|
2011-04-12 07:15:52 +03:00
|
|
|
|
except:
|
|
|
|
|
print
|
|
|
|
|
print repr(entire_text)
|
|
|
|
|
print repr(matchobj.group(0))
|
|
|
|
|
raise
|
2011-04-20 03:35:26 +03:00
|
|
|
|
if wanted_label.lower() == label.lower():
|
|
|
|
|
result = "[]{%s:%s}" % (category, target)
|
|
|
|
|
else:
|
|
|
|
|
result = "[%s]{%s:%s}" % (label, category, target)
|
|
|
|
|
if wanted_label:
|
|
|
|
|
print
|
|
|
|
|
print context
|
|
|
|
|
print "%-40s" % matchobj.group(0),
|
|
|
|
|
print '%s != %s' % (label, wanted_label)
|
|
|
|
|
assert result_link_re.match(result), result
|
2011-04-12 07:15:52 +03:00
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
def main(argv):
|
|
|
|
|
session = connect()
|
|
|
|
|
for cls in tables.mapped_classes:
|
|
|
|
|
for translation_class in cls.translation_classes:
|
|
|
|
|
columns = translation_class.__table__.c
|
|
|
|
|
md_columns = [c for c in columns if c.info.get('format') == 'markdown']
|
|
|
|
|
if not md_columns:
|
|
|
|
|
continue
|
|
|
|
|
for row in session.query(translation_class):
|
|
|
|
|
if row.local_language_id != english_id:
|
|
|
|
|
continue
|
|
|
|
|
for column in md_columns:
|
|
|
|
|
markdown = getattr(row, column.name)
|
|
|
|
|
if not markdown:
|
|
|
|
|
continue
|
|
|
|
|
text = unicode(markdown)
|
|
|
|
|
# Make sure everything that remotely looks like a link is one
|
|
|
|
|
links = fuzzy_link_re.findall(text)
|
|
|
|
|
if not links:
|
|
|
|
|
continue
|
|
|
|
|
for link in links:
|
2011-04-20 03:35:26 +03:00
|
|
|
|
assert strict_link_re.findall(link), (strict_link_re.findall(link), [link])
|
2011-04-12 07:15:52 +03:00
|
|
|
|
# Do the replacement
|
2011-04-20 03:35:26 +03:00
|
|
|
|
context = '%s %s %s' % (translation_class.__name__, row.foreign_id, column.name)
|
2011-04-12 07:15:52 +03:00
|
|
|
|
replaced = strict_link_re.sub(
|
2011-04-20 03:35:26 +03:00
|
|
|
|
partial(get_replacement, session, text, context),
|
2011-04-12 07:15:52 +03:00
|
|
|
|
text,
|
|
|
|
|
)
|
|
|
|
|
setattr(row, column.name, replaced)
|
|
|
|
|
|
|
|
|
|
if argv and argv[0] == '--commit':
|
|
|
|
|
session.commit()
|
|
|
|
|
print 'Committed'
|
|
|
|
|
else:
|
|
|
|
|
print 'Run with --commit to commit changes'
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main(sys.argv[1:])
|