1
0
Fork 0
mirror of https://github.com/veekun/pokedex.git synced 2024-08-20 18:16:34 +00:00

Revert to six version-checking for CSV opening

This commit is contained in:
qvalador 2018-08-23 01:37:19 -04:00
parent 8406878eee
commit d70e95cb42
2 changed files with 19 additions and 46 deletions

View file

@ -5,7 +5,6 @@ import csv
import fnmatch
import os.path
import sys
from io import open
import six
import sqlalchemy.sql.util
@ -17,6 +16,7 @@ from pokedex.defaults import get_default_csv_dir
from pokedex.db.dependencies import find_dependent_tables
from pokedex.db.oracle import rewrite_long_table_names
def _get_table_names(metadata, patterns):
"""Returns a list of table names from the given metadata. If `patterns`
exists, only tables matching one of the patterns will be returned.
@ -40,7 +40,6 @@ def _get_verbose_prints(verbose):
"""If `verbose` is true, returns three functions: one for printing a
starting message, one for printing an interim status update, and one for
printing a success or failure message when finished.
If `verbose` is false, returns no-op functions.
"""
@ -101,32 +100,23 @@ def _get_verbose_prints(verbose):
def load(session, tables=[], directory=None, drop_tables=False, verbose=False, safe=True, recursive=True, langs=None):
"""Load data from CSV files into the given database session.
Tables are created automatically.
`session`
SQLAlchemy session to use.
`tables`
List of tables to load. If omitted, all tables are loaded.
`directory`
Directory the CSV files reside in. Defaults to the `pokedex` data
directory.
`drop_tables`
If set to True, existing `pokedex`-related tables will be dropped.
`verbose`
If set to True, status messages will be printed to stdout.
`safe`
If set to False, load can be faster, but can corrupt the database if
it crashes or is interrupted.
`recursive`
If set to True, load all dependent tables too.
`langs`
List of identifiers of extra language to load, or None to load them all
"""
@ -210,7 +200,10 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
try:
csvpath = "%s/%s.csv" % (directory, table_name)
csvfile = open(csvpath, 'r', encoding='utf-8')
if six.PY2:
csvfile = open(csvpath, 'r')
else:
csvfile = open(csvpath, 'r', encoding="utf8")
except IOError:
# File doesn't exist; don't load anything!
print_done('missing?')
@ -370,20 +363,15 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
def dump(session, tables=[], directory=None, verbose=False, langs=None):
"""Dumps the contents of a database to a set of CSV files. Probably not
useful to anyone besides a developer.
`session`
SQLAlchemy session to use.
`tables`
List of tables to dump. If omitted, all tables are dumped.
`directory`
Directory the CSV files should be put in. Defaults to the `pokedex`
data directory.
`verbose`
If set to True, status messages will be printed to stdout.
`langs`
List of identifiers of languages to dump unofficial texts for
"""
@ -416,7 +404,7 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None):
# CSV module only works with bytes on 2 and only works with text on 3!
if six.PY3:
writer = csv.writer(open(filename, 'w', newline='', encoding='utf-8'), lineterminator='\n')
writer = csv.writer(open(filename, 'w', newline='', encoding="utf8"), lineterminator='\n')
columns = [col.name for col in table.columns]
else:
writer = csv.writer(open(filename, 'wb'), lineterminator='\n')
@ -467,3 +455,4 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None):
writer.writerow(csvs)
print_done()

View file

@ -1,23 +1,18 @@
#! /usr/bin/env python
u"""General handling of translations
The general idea is to get messages from somewhere: the source pokedex CSVs,
or the translation CSVs, etc., then merge them together in some way, and shove
them into the database.
If a message is translated, it has a source string attached to it, with the
original English version. Or at least it has a CRC of the original.
When that doesn't match, it means the English string changed and the
translation has to be updated.
Also this is why we can't dump translations from the database: there's no
original string info.
Some complications:
Flavor text is so repetitive that we take strings from all the version,
separate the unique ones by blank lines, let translators work on that, and then
put it in flavor_summary tables.
Routes names and other repetitive numeric things are replaced by e.g.
"Route {num}" so translators only have to work on each set once.
"""
@ -25,10 +20,10 @@ from __future__ import print_function
import binascii
import csv
import io
import os
import re
from collections import defaultdict
from io import open
import six
from six.moves import zip
@ -73,12 +68,10 @@ def crc(string):
class Message(object):
"""Holds all info about a translatable or translated string
cls: Name of the mapped class the message belongs to
id: The id of the thing the message belongs to
colname: name of the database column
strings: A list of strings in the message, usualy of length 1.
Optional attributes (None if not set):
colsize: Max length of the database column
source: The string this was translated from
@ -220,9 +213,7 @@ class Translations(object):
def write_translations(self, lang, *streams):
"""Write a translation CSV containing messages from streams.
Streams should be ordered by priority, from highest to lowest.
Any official translations (from the main database) are added automatically.
"""
writer = self.writer_for_lang(lang)
@ -262,15 +253,18 @@ class Translations(object):
def reader_for_class(self, cls, reader_class=csv.reader):
tablename = cls.__table__.name
csvpath = os.path.join(self.csv_directory, tablename + '.csv')
return reader_class(open(csvpath, 'r', encoding='utf-8'), lineterminator='\n')
if six.PY2:
read = open(csvpath, 'r')
else:
read = open(csvpath, 'r', encoding='utf-8')
return reader_class(read, lineterminator='\n')
def writer_for_lang(self, lang):
csvpath = os.path.join(self.translation_directory, '%s.csv' % lang)
return csv.writer(open(csvpath, 'w', encoding='utf-8', newline=''), lineterminator='\n')
return csv.writer(io.open(csvpath, 'w', newline='', encoding="utf8"), lineterminator='\n')
def yield_source_messages(self, language_id=None):
"""Yield all messages from source CSV files
Messages from all languages are returned. The messages are not ordered
properly, but splitting the stream by language (and filtering results
by merge_adjacent) will produce proper streams.
@ -307,7 +301,10 @@ class Translations(object):
"""
path = os.path.join(self.csv_directory, 'translations', '%s.csv' % lang)
try:
file = open(path, 'r', encoding='utf-8')
if six.PY2:
file = open(path, 'r')
else:
file = open(path, 'r', encoding="utf8")
except IOError:
return ()
return yield_translation_csv_messages(file)
@ -320,7 +317,6 @@ class Translations(object):
def get_load_data(self, langs=None):
"""Yield (translation_class, data for INSERT) pairs for loading into the DB
langs is either a list of language identifiers or None
"""
if langs is None:
@ -366,7 +362,6 @@ class Translations(object):
def group_by_object(stream):
"""Group stream by object
Yields ((class name, object ID), (list of messages)) pairs.
"""
stream = iter(stream)
@ -384,7 +379,6 @@ def group_by_object(stream):
class Merge(object):
"""Merge several sorted iterators together
Additional iterators may be added at any time with add_iterator.
Accepts None for the initial iterators
If the same value appears in more iterators, there will be duplicates in
@ -442,13 +436,10 @@ def merge_adjacent(gen):
def leftjoin(left_stream, right_stream, key=lambda x: x, unused=None):
"""A "left join" operation on sorted iterators
Yields (left, right) pairs, where left comes from left_stream and right
is the corresponding item from right, or None
Note that if there are duplicates in right_stream, you won't get duplicate
rows for them.
If given, unused should be a one-arg function that will get called on all
unused items in right_stream.
"""
@ -587,14 +578,12 @@ def yield_translation_csv_messages(file, no_header=False):
def pot_for_column(cls, column, summary=False):
"""Translatable texts get categorized into different POT files to help
translators prioritize. The pots are:
- flavor: Flavor texts: here, strings from multiple versions are summarized
- ripped: Strings ripped from the games; translators for "official"
languages don't need to bother with these
- effects: Fanon descriptions of things; they usually use technical
language
- misc: Everything else; usually small texts
Set source to true if this is a flavor summary column. Others are
determined by the column itself.
"""
@ -614,14 +603,11 @@ def number_replace(source, string):
def match_to_source(source, *translations):
"""Matches translated string(s) to source
The first translation whose source matches the source message, or whose CRC
matches, or which is official, and which is not fuzzy, it is used.
If thre's no such translation, the first translation is used.
Returns (source, source string CRC, string for CSV file, exact match?)
If there are no translations, returns (source, None, None, None)
Handles translations where numbers have been replaced by {num}, if they
have source information.
"""
@ -662,9 +648,7 @@ def match_to_source(source, *translations):
def merge_translations(source_stream, *translation_streams, **kwargs):
"""For each source message, get its best translation from translations.
Translations should be ordered by priority, highest to lowest.
Messages that don't appear in translations at all aren't included.
"""
source = tuple(source_stream)
@ -673,4 +657,4 @@ def merge_translations(source_stream, *translation_streams, **kwargs):
for t in translation_streams
]
for messages in zip(source, *streams):
yield match_to_source(*messages)
yield match_to_source(*messages)