mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Revert to six version-checking for CSV opening
This commit is contained in:
parent
8406878eee
commit
d70e95cb42
2 changed files with 19 additions and 46 deletions
pokedex/db
|
@ -5,7 +5,6 @@ import csv
|
|||
import fnmatch
|
||||
import os.path
|
||||
import sys
|
||||
from io import open
|
||||
|
||||
import six
|
||||
import sqlalchemy.sql.util
|
||||
|
@ -17,6 +16,7 @@ from pokedex.defaults import get_default_csv_dir
|
|||
from pokedex.db.dependencies import find_dependent_tables
|
||||
from pokedex.db.oracle import rewrite_long_table_names
|
||||
|
||||
|
||||
def _get_table_names(metadata, patterns):
|
||||
"""Returns a list of table names from the given metadata. If `patterns`
|
||||
exists, only tables matching one of the patterns will be returned.
|
||||
|
@ -40,7 +40,6 @@ def _get_verbose_prints(verbose):
|
|||
"""If `verbose` is true, returns three functions: one for printing a
|
||||
starting message, one for printing an interim status update, and one for
|
||||
printing a success or failure message when finished.
|
||||
|
||||
If `verbose` is false, returns no-op functions.
|
||||
"""
|
||||
|
||||
|
@ -101,32 +100,23 @@ def _get_verbose_prints(verbose):
|
|||
|
||||
def load(session, tables=[], directory=None, drop_tables=False, verbose=False, safe=True, recursive=True, langs=None):
|
||||
"""Load data from CSV files into the given database session.
|
||||
|
||||
Tables are created automatically.
|
||||
|
||||
`session`
|
||||
SQLAlchemy session to use.
|
||||
|
||||
`tables`
|
||||
List of tables to load. If omitted, all tables are loaded.
|
||||
|
||||
`directory`
|
||||
Directory the CSV files reside in. Defaults to the `pokedex` data
|
||||
directory.
|
||||
|
||||
`drop_tables`
|
||||
If set to True, existing `pokedex`-related tables will be dropped.
|
||||
|
||||
`verbose`
|
||||
If set to True, status messages will be printed to stdout.
|
||||
|
||||
`safe`
|
||||
If set to False, load can be faster, but can corrupt the database if
|
||||
it crashes or is interrupted.
|
||||
|
||||
`recursive`
|
||||
If set to True, load all dependent tables too.
|
||||
|
||||
`langs`
|
||||
List of identifiers of extra language to load, or None to load them all
|
||||
"""
|
||||
|
@ -210,7 +200,10 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
|
|||
|
||||
try:
|
||||
csvpath = "%s/%s.csv" % (directory, table_name)
|
||||
csvfile = open(csvpath, 'r', encoding='utf-8')
|
||||
if six.PY2:
|
||||
csvfile = open(csvpath, 'r')
|
||||
else:
|
||||
csvfile = open(csvpath, 'r', encoding="utf8")
|
||||
except IOError:
|
||||
# File doesn't exist; don't load anything!
|
||||
print_done('missing?')
|
||||
|
@ -370,20 +363,15 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
|
|||
def dump(session, tables=[], directory=None, verbose=False, langs=None):
|
||||
"""Dumps the contents of a database to a set of CSV files. Probably not
|
||||
useful to anyone besides a developer.
|
||||
|
||||
`session`
|
||||
SQLAlchemy session to use.
|
||||
|
||||
`tables`
|
||||
List of tables to dump. If omitted, all tables are dumped.
|
||||
|
||||
`directory`
|
||||
Directory the CSV files should be put in. Defaults to the `pokedex`
|
||||
data directory.
|
||||
|
||||
`verbose`
|
||||
If set to True, status messages will be printed to stdout.
|
||||
|
||||
`langs`
|
||||
List of identifiers of languages to dump unofficial texts for
|
||||
"""
|
||||
|
@ -416,7 +404,7 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None):
|
|||
|
||||
# CSV module only works with bytes on 2 and only works with text on 3!
|
||||
if six.PY3:
|
||||
writer = csv.writer(open(filename, 'w', newline='', encoding='utf-8'), lineterminator='\n')
|
||||
writer = csv.writer(open(filename, 'w', newline='', encoding="utf8"), lineterminator='\n')
|
||||
columns = [col.name for col in table.columns]
|
||||
else:
|
||||
writer = csv.writer(open(filename, 'wb'), lineterminator='\n')
|
||||
|
@ -467,3 +455,4 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None):
|
|||
writer.writerow(csvs)
|
||||
|
||||
print_done()
|
||||
|
|
@ -1,23 +1,18 @@
|
|||
#! /usr/bin/env python
|
||||
u"""General handling of translations
|
||||
|
||||
The general idea is to get messages from somewhere: the source pokedex CSVs,
|
||||
or the translation CSVs, etc., then merge them together in some way, and shove
|
||||
them into the database.
|
||||
|
||||
If a message is translated, it has a source string attached to it, with the
|
||||
original English version. Or at least it has a CRC of the original.
|
||||
When that doesn't match, it means the English string changed and the
|
||||
translation has to be updated.
|
||||
Also this is why we can't dump translations from the database: there's no
|
||||
original string info.
|
||||
|
||||
Some complications:
|
||||
|
||||
Flavor text is so repetitive that we take strings from all the version,
|
||||
separate the unique ones by blank lines, let translators work on that, and then
|
||||
put it in flavor_summary tables.
|
||||
|
||||
Routes names and other repetitive numeric things are replaced by e.g.
|
||||
"Route {num}" so translators only have to work on each set once.
|
||||
"""
|
||||
|
@ -25,10 +20,10 @@ from __future__ import print_function
|
|||
|
||||
import binascii
|
||||
import csv
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from io import open
|
||||
|
||||
import six
|
||||
from six.moves import zip
|
||||
|
@ -73,12 +68,10 @@ def crc(string):
|
|||
|
||||
class Message(object):
|
||||
"""Holds all info about a translatable or translated string
|
||||
|
||||
cls: Name of the mapped class the message belongs to
|
||||
id: The id of the thing the message belongs to
|
||||
colname: name of the database column
|
||||
strings: A list of strings in the message, usualy of length 1.
|
||||
|
||||
Optional attributes (None if not set):
|
||||
colsize: Max length of the database column
|
||||
source: The string this was translated from
|
||||
|
@ -220,9 +213,7 @@ class Translations(object):
|
|||
|
||||
def write_translations(self, lang, *streams):
|
||||
"""Write a translation CSV containing messages from streams.
|
||||
|
||||
Streams should be ordered by priority, from highest to lowest.
|
||||
|
||||
Any official translations (from the main database) are added automatically.
|
||||
"""
|
||||
writer = self.writer_for_lang(lang)
|
||||
|
@ -262,15 +253,18 @@ class Translations(object):
|
|||
def reader_for_class(self, cls, reader_class=csv.reader):
|
||||
tablename = cls.__table__.name
|
||||
csvpath = os.path.join(self.csv_directory, tablename + '.csv')
|
||||
return reader_class(open(csvpath, 'r', encoding='utf-8'), lineterminator='\n')
|
||||
if six.PY2:
|
||||
read = open(csvpath, 'r')
|
||||
else:
|
||||
read = open(csvpath, 'r', encoding='utf-8')
|
||||
return reader_class(read, lineterminator='\n')
|
||||
|
||||
def writer_for_lang(self, lang):
|
||||
csvpath = os.path.join(self.translation_directory, '%s.csv' % lang)
|
||||
return csv.writer(open(csvpath, 'w', encoding='utf-8', newline=''), lineterminator='\n')
|
||||
return csv.writer(io.open(csvpath, 'w', newline='', encoding="utf8"), lineterminator='\n')
|
||||
|
||||
def yield_source_messages(self, language_id=None):
|
||||
"""Yield all messages from source CSV files
|
||||
|
||||
Messages from all languages are returned. The messages are not ordered
|
||||
properly, but splitting the stream by language (and filtering results
|
||||
by merge_adjacent) will produce proper streams.
|
||||
|
@ -307,7 +301,10 @@ class Translations(object):
|
|||
"""
|
||||
path = os.path.join(self.csv_directory, 'translations', '%s.csv' % lang)
|
||||
try:
|
||||
file = open(path, 'r', encoding='utf-8')
|
||||
if six.PY2:
|
||||
file = open(path, 'r')
|
||||
else:
|
||||
file = open(path, 'r', encoding="utf8")
|
||||
except IOError:
|
||||
return ()
|
||||
return yield_translation_csv_messages(file)
|
||||
|
@ -320,7 +317,6 @@ class Translations(object):
|
|||
|
||||
def get_load_data(self, langs=None):
|
||||
"""Yield (translation_class, data for INSERT) pairs for loading into the DB
|
||||
|
||||
langs is either a list of language identifiers or None
|
||||
"""
|
||||
if langs is None:
|
||||
|
@ -366,7 +362,6 @@ class Translations(object):
|
|||
|
||||
def group_by_object(stream):
|
||||
"""Group stream by object
|
||||
|
||||
Yields ((class name, object ID), (list of messages)) pairs.
|
||||
"""
|
||||
stream = iter(stream)
|
||||
|
@ -384,7 +379,6 @@ def group_by_object(stream):
|
|||
|
||||
class Merge(object):
|
||||
"""Merge several sorted iterators together
|
||||
|
||||
Additional iterators may be added at any time with add_iterator.
|
||||
Accepts None for the initial iterators
|
||||
If the same value appears in more iterators, there will be duplicates in
|
||||
|
@ -442,13 +436,10 @@ def merge_adjacent(gen):
|
|||
|
||||
def leftjoin(left_stream, right_stream, key=lambda x: x, unused=None):
|
||||
"""A "left join" operation on sorted iterators
|
||||
|
||||
Yields (left, right) pairs, where left comes from left_stream and right
|
||||
is the corresponding item from right, or None
|
||||
|
||||
Note that if there are duplicates in right_stream, you won't get duplicate
|
||||
rows for them.
|
||||
|
||||
If given, unused should be a one-arg function that will get called on all
|
||||
unused items in right_stream.
|
||||
"""
|
||||
|
@ -587,14 +578,12 @@ def yield_translation_csv_messages(file, no_header=False):
|
|||
def pot_for_column(cls, column, summary=False):
|
||||
"""Translatable texts get categorized into different POT files to help
|
||||
translators prioritize. The pots are:
|
||||
|
||||
- flavor: Flavor texts: here, strings from multiple versions are summarized
|
||||
- ripped: Strings ripped from the games; translators for "official"
|
||||
languages don't need to bother with these
|
||||
- effects: Fanon descriptions of things; they usually use technical
|
||||
language
|
||||
- misc: Everything else; usually small texts
|
||||
|
||||
Set source to true if this is a flavor summary column. Others are
|
||||
determined by the column itself.
|
||||
"""
|
||||
|
@ -614,14 +603,11 @@ def number_replace(source, string):
|
|||
|
||||
def match_to_source(source, *translations):
|
||||
"""Matches translated string(s) to source
|
||||
|
||||
The first translation whose source matches the source message, or whose CRC
|
||||
matches, or which is official, and which is not fuzzy, it is used.
|
||||
If thre's no such translation, the first translation is used.
|
||||
|
||||
Returns (source, source string CRC, string for CSV file, exact match?)
|
||||
If there are no translations, returns (source, None, None, None)
|
||||
|
||||
Handles translations where numbers have been replaced by {num}, if they
|
||||
have source information.
|
||||
"""
|
||||
|
@ -662,9 +648,7 @@ def match_to_source(source, *translations):
|
|||
|
||||
def merge_translations(source_stream, *translation_streams, **kwargs):
|
||||
"""For each source message, get its best translation from translations.
|
||||
|
||||
Translations should be ordered by priority, highest to lowest.
|
||||
|
||||
Messages that don't appear in translations at all aren't included.
|
||||
"""
|
||||
source = tuple(source_stream)
|
||||
|
@ -673,4 +657,4 @@ def merge_translations(source_stream, *translation_streams, **kwargs):
|
|||
for t in translation_streams
|
||||
]
|
||||
for messages in zip(source, *streams):
|
||||
yield match_to_source(*messages)
|
||||
yield match_to_source(*messages)
|
Loading…
Add table
Reference in a new issue