1
0
Fork 0
mirror of https://github.com/veekun/pokedex.git synced 2024-08-20 18:16:34 +00:00

Revert to six version-checking for CSV opening

This commit is contained in:
qvalador 2018-08-23 01:37:19 -04:00
parent 8406878eee
commit d70e95cb42
2 changed files with 19 additions and 46 deletions

View file

@ -5,7 +5,6 @@ import csv
import fnmatch import fnmatch
import os.path import os.path
import sys import sys
from io import open
import six import six
import sqlalchemy.sql.util import sqlalchemy.sql.util
@ -17,6 +16,7 @@ from pokedex.defaults import get_default_csv_dir
from pokedex.db.dependencies import find_dependent_tables from pokedex.db.dependencies import find_dependent_tables
from pokedex.db.oracle import rewrite_long_table_names from pokedex.db.oracle import rewrite_long_table_names
def _get_table_names(metadata, patterns): def _get_table_names(metadata, patterns):
"""Returns a list of table names from the given metadata. If `patterns` """Returns a list of table names from the given metadata. If `patterns`
exists, only tables matching one of the patterns will be returned. exists, only tables matching one of the patterns will be returned.
@ -40,7 +40,6 @@ def _get_verbose_prints(verbose):
"""If `verbose` is true, returns three functions: one for printing a """If `verbose` is true, returns three functions: one for printing a
starting message, one for printing an interim status update, and one for starting message, one for printing an interim status update, and one for
printing a success or failure message when finished. printing a success or failure message when finished.
If `verbose` is false, returns no-op functions. If `verbose` is false, returns no-op functions.
""" """
@ -101,32 +100,23 @@ def _get_verbose_prints(verbose):
def load(session, tables=[], directory=None, drop_tables=False, verbose=False, safe=True, recursive=True, langs=None): def load(session, tables=[], directory=None, drop_tables=False, verbose=False, safe=True, recursive=True, langs=None):
"""Load data from CSV files into the given database session. """Load data from CSV files into the given database session.
Tables are created automatically. Tables are created automatically.
`session` `session`
SQLAlchemy session to use. SQLAlchemy session to use.
`tables` `tables`
List of tables to load. If omitted, all tables are loaded. List of tables to load. If omitted, all tables are loaded.
`directory` `directory`
Directory the CSV files reside in. Defaults to the `pokedex` data Directory the CSV files reside in. Defaults to the `pokedex` data
directory. directory.
`drop_tables` `drop_tables`
If set to True, existing `pokedex`-related tables will be dropped. If set to True, existing `pokedex`-related tables will be dropped.
`verbose` `verbose`
If set to True, status messages will be printed to stdout. If set to True, status messages will be printed to stdout.
`safe` `safe`
If set to False, load can be faster, but can corrupt the database if If set to False, load can be faster, but can corrupt the database if
it crashes or is interrupted. it crashes or is interrupted.
`recursive` `recursive`
If set to True, load all dependent tables too. If set to True, load all dependent tables too.
`langs` `langs`
List of identifiers of extra language to load, or None to load them all List of identifiers of extra language to load, or None to load them all
""" """
@ -210,7 +200,10 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
try: try:
csvpath = "%s/%s.csv" % (directory, table_name) csvpath = "%s/%s.csv" % (directory, table_name)
csvfile = open(csvpath, 'r', encoding='utf-8') if six.PY2:
csvfile = open(csvpath, 'r')
else:
csvfile = open(csvpath, 'r', encoding="utf8")
except IOError: except IOError:
# File doesn't exist; don't load anything! # File doesn't exist; don't load anything!
print_done('missing?') print_done('missing?')
@ -370,20 +363,15 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
def dump(session, tables=[], directory=None, verbose=False, langs=None): def dump(session, tables=[], directory=None, verbose=False, langs=None):
"""Dumps the contents of a database to a set of CSV files. Probably not """Dumps the contents of a database to a set of CSV files. Probably not
useful to anyone besides a developer. useful to anyone besides a developer.
`session` `session`
SQLAlchemy session to use. SQLAlchemy session to use.
`tables` `tables`
List of tables to dump. If omitted, all tables are dumped. List of tables to dump. If omitted, all tables are dumped.
`directory` `directory`
Directory the CSV files should be put in. Defaults to the `pokedex` Directory the CSV files should be put in. Defaults to the `pokedex`
data directory. data directory.
`verbose` `verbose`
If set to True, status messages will be printed to stdout. If set to True, status messages will be printed to stdout.
`langs` `langs`
List of identifiers of languages to dump unofficial texts for List of identifiers of languages to dump unofficial texts for
""" """
@ -416,7 +404,7 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None):
# CSV module only works with bytes on 2 and only works with text on 3! # CSV module only works with bytes on 2 and only works with text on 3!
if six.PY3: if six.PY3:
writer = csv.writer(open(filename, 'w', newline='', encoding='utf-8'), lineterminator='\n') writer = csv.writer(open(filename, 'w', newline='', encoding="utf8"), lineterminator='\n')
columns = [col.name for col in table.columns] columns = [col.name for col in table.columns]
else: else:
writer = csv.writer(open(filename, 'wb'), lineterminator='\n') writer = csv.writer(open(filename, 'wb'), lineterminator='\n')
@ -467,3 +455,4 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None):
writer.writerow(csvs) writer.writerow(csvs)
print_done() print_done()

View file

@ -1,23 +1,18 @@
#! /usr/bin/env python #! /usr/bin/env python
u"""General handling of translations u"""General handling of translations
The general idea is to get messages from somewhere: the source pokedex CSVs, The general idea is to get messages from somewhere: the source pokedex CSVs,
or the translation CSVs, etc., then merge them together in some way, and shove or the translation CSVs, etc., then merge them together in some way, and shove
them into the database. them into the database.
If a message is translated, it has a source string attached to it, with the If a message is translated, it has a source string attached to it, with the
original English version. Or at least it has a CRC of the original. original English version. Or at least it has a CRC of the original.
When that doesn't match, it means the English string changed and the When that doesn't match, it means the English string changed and the
translation has to be updated. translation has to be updated.
Also this is why we can't dump translations from the database: there's no Also this is why we can't dump translations from the database: there's no
original string info. original string info.
Some complications: Some complications:
Flavor text is so repetitive that we take strings from all the version, Flavor text is so repetitive that we take strings from all the version,
separate the unique ones by blank lines, let translators work on that, and then separate the unique ones by blank lines, let translators work on that, and then
put it in flavor_summary tables. put it in flavor_summary tables.
Routes names and other repetitive numeric things are replaced by e.g. Routes names and other repetitive numeric things are replaced by e.g.
"Route {num}" so translators only have to work on each set once. "Route {num}" so translators only have to work on each set once.
""" """
@ -25,10 +20,10 @@ from __future__ import print_function
import binascii import binascii
import csv import csv
import io
import os import os
import re import re
from collections import defaultdict from collections import defaultdict
from io import open
import six import six
from six.moves import zip from six.moves import zip
@ -73,12 +68,10 @@ def crc(string):
class Message(object): class Message(object):
"""Holds all info about a translatable or translated string """Holds all info about a translatable or translated string
cls: Name of the mapped class the message belongs to cls: Name of the mapped class the message belongs to
id: The id of the thing the message belongs to id: The id of the thing the message belongs to
colname: name of the database column colname: name of the database column
strings: A list of strings in the message, usualy of length 1. strings: A list of strings in the message, usualy of length 1.
Optional attributes (None if not set): Optional attributes (None if not set):
colsize: Max length of the database column colsize: Max length of the database column
source: The string this was translated from source: The string this was translated from
@ -220,9 +213,7 @@ class Translations(object):
def write_translations(self, lang, *streams): def write_translations(self, lang, *streams):
"""Write a translation CSV containing messages from streams. """Write a translation CSV containing messages from streams.
Streams should be ordered by priority, from highest to lowest. Streams should be ordered by priority, from highest to lowest.
Any official translations (from the main database) are added automatically. Any official translations (from the main database) are added automatically.
""" """
writer = self.writer_for_lang(lang) writer = self.writer_for_lang(lang)
@ -262,15 +253,18 @@ class Translations(object):
def reader_for_class(self, cls, reader_class=csv.reader): def reader_for_class(self, cls, reader_class=csv.reader):
tablename = cls.__table__.name tablename = cls.__table__.name
csvpath = os.path.join(self.csv_directory, tablename + '.csv') csvpath = os.path.join(self.csv_directory, tablename + '.csv')
return reader_class(open(csvpath, 'r', encoding='utf-8'), lineterminator='\n') if six.PY2:
read = open(csvpath, 'r')
else:
read = open(csvpath, 'r', encoding='utf-8')
return reader_class(read, lineterminator='\n')
def writer_for_lang(self, lang): def writer_for_lang(self, lang):
csvpath = os.path.join(self.translation_directory, '%s.csv' % lang) csvpath = os.path.join(self.translation_directory, '%s.csv' % lang)
return csv.writer(open(csvpath, 'w', encoding='utf-8', newline=''), lineterminator='\n') return csv.writer(io.open(csvpath, 'w', newline='', encoding="utf8"), lineterminator='\n')
def yield_source_messages(self, language_id=None): def yield_source_messages(self, language_id=None):
"""Yield all messages from source CSV files """Yield all messages from source CSV files
Messages from all languages are returned. The messages are not ordered Messages from all languages are returned. The messages are not ordered
properly, but splitting the stream by language (and filtering results properly, but splitting the stream by language (and filtering results
by merge_adjacent) will produce proper streams. by merge_adjacent) will produce proper streams.
@ -307,7 +301,10 @@ class Translations(object):
""" """
path = os.path.join(self.csv_directory, 'translations', '%s.csv' % lang) path = os.path.join(self.csv_directory, 'translations', '%s.csv' % lang)
try: try:
file = open(path, 'r', encoding='utf-8') if six.PY2:
file = open(path, 'r')
else:
file = open(path, 'r', encoding="utf8")
except IOError: except IOError:
return () return ()
return yield_translation_csv_messages(file) return yield_translation_csv_messages(file)
@ -320,7 +317,6 @@ class Translations(object):
def get_load_data(self, langs=None): def get_load_data(self, langs=None):
"""Yield (translation_class, data for INSERT) pairs for loading into the DB """Yield (translation_class, data for INSERT) pairs for loading into the DB
langs is either a list of language identifiers or None langs is either a list of language identifiers or None
""" """
if langs is None: if langs is None:
@ -366,7 +362,6 @@ class Translations(object):
def group_by_object(stream): def group_by_object(stream):
"""Group stream by object """Group stream by object
Yields ((class name, object ID), (list of messages)) pairs. Yields ((class name, object ID), (list of messages)) pairs.
""" """
stream = iter(stream) stream = iter(stream)
@ -384,7 +379,6 @@ def group_by_object(stream):
class Merge(object): class Merge(object):
"""Merge several sorted iterators together """Merge several sorted iterators together
Additional iterators may be added at any time with add_iterator. Additional iterators may be added at any time with add_iterator.
Accepts None for the initial iterators Accepts None for the initial iterators
If the same value appears in more iterators, there will be duplicates in If the same value appears in more iterators, there will be duplicates in
@ -442,13 +436,10 @@ def merge_adjacent(gen):
def leftjoin(left_stream, right_stream, key=lambda x: x, unused=None): def leftjoin(left_stream, right_stream, key=lambda x: x, unused=None):
"""A "left join" operation on sorted iterators """A "left join" operation on sorted iterators
Yields (left, right) pairs, where left comes from left_stream and right Yields (left, right) pairs, where left comes from left_stream and right
is the corresponding item from right, or None is the corresponding item from right, or None
Note that if there are duplicates in right_stream, you won't get duplicate Note that if there are duplicates in right_stream, you won't get duplicate
rows for them. rows for them.
If given, unused should be a one-arg function that will get called on all If given, unused should be a one-arg function that will get called on all
unused items in right_stream. unused items in right_stream.
""" """
@ -587,14 +578,12 @@ def yield_translation_csv_messages(file, no_header=False):
def pot_for_column(cls, column, summary=False): def pot_for_column(cls, column, summary=False):
"""Translatable texts get categorized into different POT files to help """Translatable texts get categorized into different POT files to help
translators prioritize. The pots are: translators prioritize. The pots are:
- flavor: Flavor texts: here, strings from multiple versions are summarized - flavor: Flavor texts: here, strings from multiple versions are summarized
- ripped: Strings ripped from the games; translators for "official" - ripped: Strings ripped from the games; translators for "official"
languages don't need to bother with these languages don't need to bother with these
- effects: Fanon descriptions of things; they usually use technical - effects: Fanon descriptions of things; they usually use technical
language language
- misc: Everything else; usually small texts - misc: Everything else; usually small texts
Set source to true if this is a flavor summary column. Others are Set source to true if this is a flavor summary column. Others are
determined by the column itself. determined by the column itself.
""" """
@ -614,14 +603,11 @@ def number_replace(source, string):
def match_to_source(source, *translations): def match_to_source(source, *translations):
"""Matches translated string(s) to source """Matches translated string(s) to source
The first translation whose source matches the source message, or whose CRC The first translation whose source matches the source message, or whose CRC
matches, or which is official, and which is not fuzzy, it is used. matches, or which is official, and which is not fuzzy, it is used.
If thre's no such translation, the first translation is used. If thre's no such translation, the first translation is used.
Returns (source, source string CRC, string for CSV file, exact match?) Returns (source, source string CRC, string for CSV file, exact match?)
If there are no translations, returns (source, None, None, None) If there are no translations, returns (source, None, None, None)
Handles translations where numbers have been replaced by {num}, if they Handles translations where numbers have been replaced by {num}, if they
have source information. have source information.
""" """
@ -662,9 +648,7 @@ def match_to_source(source, *translations):
def merge_translations(source_stream, *translation_streams, **kwargs): def merge_translations(source_stream, *translation_streams, **kwargs):
"""For each source message, get its best translation from translations. """For each source message, get its best translation from translations.
Translations should be ordered by priority, highest to lowest. Translations should be ordered by priority, highest to lowest.
Messages that don't appear in translations at all aren't included. Messages that don't appear in translations at all aren't included.
""" """
source = tuple(source_stream) source = tuple(source_stream)
@ -673,4 +657,4 @@ def merge_translations(source_stream, *translation_streams, **kwargs):
for t in translation_streams for t in translation_streams
] ]
for messages in zip(source, *streams): for messages in zip(source, *streams):
yield match_to_source(*messages) yield match_to_source(*messages)