1
0
Fork 0
mirror of https://github.com/veekun/pokedex.git synced 2024-08-20 18:16:34 +00:00

Fix missing whitespace

This commit is contained in:
qvalador 2018-08-23 01:43:34 -04:00
parent d70e95cb42
commit 9e492b5f1d
2 changed files with 39 additions and 2 deletions

View file

@ -40,6 +40,7 @@ def _get_verbose_prints(verbose):
"""If `verbose` is true, returns three functions: one for printing a
starting message, one for printing an interim status update, and one for
printing a success or failure message when finished.
If `verbose` is false, returns no-op functions.
"""
@ -100,23 +101,32 @@ def _get_verbose_prints(verbose):
def load(session, tables=[], directory=None, drop_tables=False, verbose=False, safe=True, recursive=True, langs=None):
"""Load data from CSV files into the given database session.
Tables are created automatically.
`session`
SQLAlchemy session to use.
`tables`
List of tables to load. If omitted, all tables are loaded.
`directory`
Directory the CSV files reside in. Defaults to the `pokedex` data
directory.
`drop_tables`
If set to True, existing `pokedex`-related tables will be dropped.
`verbose`
If set to True, status messages will be printed to stdout.
`safe`
If set to False, load can be faster, but can corrupt the database if
it crashes or is interrupted.
`recursive`
If set to True, load all dependent tables too.
`langs`
List of identifiers of extra language to load, or None to load them all
"""
@ -363,15 +373,20 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
def dump(session, tables=[], directory=None, verbose=False, langs=None):
"""Dumps the contents of a database to a set of CSV files. Probably not
useful to anyone besides a developer.
`session`
SQLAlchemy session to use.
`tables`
List of tables to dump. If omitted, all tables are dumped.
`directory`
Directory the CSV files should be put in. Defaults to the `pokedex`
data directory.
`verbose`
If set to True, status messages will be printed to stdout.
`langs`
List of identifiers of languages to dump unofficial texts for
"""
@ -455,4 +470,3 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None):
writer.writerow(csvs)
print_done()

View file

@ -1,18 +1,23 @@
#! /usr/bin/env python
u"""General handling of translations
The general idea is to get messages from somewhere: the source pokedex CSVs,
or the translation CSVs, etc., then merge them together in some way, and shove
them into the database.
If a message is translated, it has a source string attached to it, with the
original English version. Or at least it has a CRC of the original.
When that doesn't match, it means the English string changed and the
translation has to be updated.
Also this is why we can't dump translations from the database: there's no
original string info.
Some complications:
Flavor text is so repetitive that we take strings from all the version,
separate the unique ones by blank lines, let translators work on that, and then
put it in flavor_summary tables.
Routes names and other repetitive numeric things are replaced by e.g.
"Route {num}" so translators only have to work on each set once.
"""
@ -68,10 +73,12 @@ def crc(string):
class Message(object):
"""Holds all info about a translatable or translated string
cls: Name of the mapped class the message belongs to
id: The id of the thing the message belongs to
colname: name of the database column
strings: A list of strings in the message, usualy of length 1.
Optional attributes (None if not set):
colsize: Max length of the database column
source: The string this was translated from
@ -213,7 +220,9 @@ class Translations(object):
def write_translations(self, lang, *streams):
"""Write a translation CSV containing messages from streams.
Streams should be ordered by priority, from highest to lowest.
Any official translations (from the main database) are added automatically.
"""
writer = self.writer_for_lang(lang)
@ -265,6 +274,7 @@ class Translations(object):
def yield_source_messages(self, language_id=None):
"""Yield all messages from source CSV files
Messages from all languages are returned. The messages are not ordered
properly, but splitting the stream by language (and filtering results
by merge_adjacent) will produce proper streams.
@ -317,6 +327,7 @@ class Translations(object):
def get_load_data(self, langs=None):
"""Yield (translation_class, data for INSERT) pairs for loading into the DB
langs is either a list of language identifiers or None
"""
if langs is None:
@ -362,6 +373,7 @@ class Translations(object):
def group_by_object(stream):
"""Group stream by object
Yields ((class name, object ID), (list of messages)) pairs.
"""
stream = iter(stream)
@ -379,6 +391,7 @@ def group_by_object(stream):
class Merge(object):
"""Merge several sorted iterators together
Additional iterators may be added at any time with add_iterator.
Accepts None for the initial iterators
If the same value appears in more iterators, there will be duplicates in
@ -436,10 +449,13 @@ def merge_adjacent(gen):
def leftjoin(left_stream, right_stream, key=lambda x: x, unused=None):
"""A "left join" operation on sorted iterators
Yields (left, right) pairs, where left comes from left_stream and right
is the corresponding item from right, or None
Note that if there are duplicates in right_stream, you won't get duplicate
rows for them.
If given, unused should be a one-arg function that will get called on all
unused items in right_stream.
"""
@ -578,12 +594,14 @@ def yield_translation_csv_messages(file, no_header=False):
def pot_for_column(cls, column, summary=False):
"""Translatable texts get categorized into different POT files to help
translators prioritize. The pots are:
- flavor: Flavor texts: here, strings from multiple versions are summarized
- ripped: Strings ripped from the games; translators for "official"
languages don't need to bother with these
- effects: Fanon descriptions of things; they usually use technical
language
- misc: Everything else; usually small texts
Set source to true if this is a flavor summary column. Others are
determined by the column itself.
"""
@ -603,11 +621,14 @@ def number_replace(source, string):
def match_to_source(source, *translations):
"""Matches translated string(s) to source
The first translation whose source matches the source message, or whose CRC
matches, or which is official, and which is not fuzzy, it is used.
If thre's no such translation, the first translation is used.
Returns (source, source string CRC, string for CSV file, exact match?)
If there are no translations, returns (source, None, None, None)
Handles translations where numbers have been replaced by {num}, if they
have source information.
"""
@ -648,7 +669,9 @@ def match_to_source(source, *translations):
def merge_translations(source_stream, *translation_streams, **kwargs):
"""For each source message, get its best translation from translations.
Translations should be ordered by priority, highest to lowest.
Messages that don't appear in translations at all aren't included.
"""
source = tuple(source_stream)
@ -657,4 +680,4 @@ def merge_translations(source_stream, *translation_streams, **kwargs):
for t in translation_streams
]
for messages in zip(source, *streams):
yield match_to_source(*messages)
yield match_to_source(*messages)