diff --git a/pokedex/db/load.py b/pokedex/db/load.py index 35abed9..8968cd4 100644 --- a/pokedex/db/load.py +++ b/pokedex/db/load.py @@ -5,7 +5,6 @@ import csv import fnmatch import os.path import sys -from io import open import six import sqlalchemy.sql.util @@ -17,6 +16,7 @@ from pokedex.defaults import get_default_csv_dir from pokedex.db.dependencies import find_dependent_tables from pokedex.db.oracle import rewrite_long_table_names + def _get_table_names(metadata, patterns): """Returns a list of table names from the given metadata. If `patterns` exists, only tables matching one of the patterns will be returned. @@ -40,7 +40,6 @@ def _get_verbose_prints(verbose): """If `verbose` is true, returns three functions: one for printing a starting message, one for printing an interim status update, and one for printing a success or failure message when finished. - If `verbose` is false, returns no-op functions. """ @@ -101,32 +100,23 @@ def _get_verbose_prints(verbose): def load(session, tables=[], directory=None, drop_tables=False, verbose=False, safe=True, recursive=True, langs=None): """Load data from CSV files into the given database session. - Tables are created automatically. - `session` SQLAlchemy session to use. - `tables` List of tables to load. If omitted, all tables are loaded. - `directory` Directory the CSV files reside in. Defaults to the `pokedex` data directory. - `drop_tables` If set to True, existing `pokedex`-related tables will be dropped. - `verbose` If set to True, status messages will be printed to stdout. - `safe` If set to False, load can be faster, but can corrupt the database if it crashes or is interrupted. - `recursive` If set to True, load all dependent tables too. - `langs` List of identifiers of extra language to load, or None to load them all """ @@ -210,7 +200,10 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s try: csvpath = "%s/%s.csv" % (directory, table_name) - csvfile = open(csvpath, 'r', encoding='utf-8') + if six.PY2: + csvfile = open(csvpath, 'r') + else: + csvfile = open(csvpath, 'r', encoding="utf8") except IOError: # File doesn't exist; don't load anything! print_done('missing?') @@ -370,20 +363,15 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s def dump(session, tables=[], directory=None, verbose=False, langs=None): """Dumps the contents of a database to a set of CSV files. Probably not useful to anyone besides a developer. - `session` SQLAlchemy session to use. - `tables` List of tables to dump. If omitted, all tables are dumped. - `directory` Directory the CSV files should be put in. Defaults to the `pokedex` data directory. - `verbose` If set to True, status messages will be printed to stdout. - `langs` List of identifiers of languages to dump unofficial texts for """ @@ -416,7 +404,7 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None): # CSV module only works with bytes on 2 and only works with text on 3! if six.PY3: - writer = csv.writer(open(filename, 'w', newline='', encoding='utf-8'), lineterminator='\n') + writer = csv.writer(open(filename, 'w', newline='', encoding="utf8"), lineterminator='\n') columns = [col.name for col in table.columns] else: writer = csv.writer(open(filename, 'wb'), lineterminator='\n') @@ -467,3 +455,4 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None): writer.writerow(csvs) print_done() + \ No newline at end of file diff --git a/pokedex/db/translations.py b/pokedex/db/translations.py index 472d417..0857aaf 100755 --- a/pokedex/db/translations.py +++ b/pokedex/db/translations.py @@ -1,23 +1,18 @@ #! /usr/bin/env python u"""General handling of translations - The general idea is to get messages from somewhere: the source pokedex CSVs, or the translation CSVs, etc., then merge them together in some way, and shove them into the database. - If a message is translated, it has a source string attached to it, with the original English version. Or at least it has a CRC of the original. When that doesn't match, it means the English string changed and the translation has to be updated. Also this is why we can't dump translations from the database: there's no original string info. - Some complications: - Flavor text is so repetitive that we take strings from all the version, separate the unique ones by blank lines, let translators work on that, and then put it in flavor_summary tables. - Routes names and other repetitive numeric things are replaced by e.g. "Route {num}" so translators only have to work on each set once. """ @@ -25,10 +20,10 @@ from __future__ import print_function import binascii import csv +import io import os import re from collections import defaultdict -from io import open import six from six.moves import zip @@ -73,12 +68,10 @@ def crc(string): class Message(object): """Holds all info about a translatable or translated string - cls: Name of the mapped class the message belongs to id: The id of the thing the message belongs to colname: name of the database column strings: A list of strings in the message, usualy of length 1. - Optional attributes (None if not set): colsize: Max length of the database column source: The string this was translated from @@ -220,9 +213,7 @@ class Translations(object): def write_translations(self, lang, *streams): """Write a translation CSV containing messages from streams. - Streams should be ordered by priority, from highest to lowest. - Any official translations (from the main database) are added automatically. """ writer = self.writer_for_lang(lang) @@ -262,15 +253,18 @@ class Translations(object): def reader_for_class(self, cls, reader_class=csv.reader): tablename = cls.__table__.name csvpath = os.path.join(self.csv_directory, tablename + '.csv') - return reader_class(open(csvpath, 'r', encoding='utf-8'), lineterminator='\n') + if six.PY2: + read = open(csvpath, 'r') + else: + read = open(csvpath, 'r', encoding='utf-8') + return reader_class(read, lineterminator='\n') def writer_for_lang(self, lang): csvpath = os.path.join(self.translation_directory, '%s.csv' % lang) - return csv.writer(open(csvpath, 'w', encoding='utf-8', newline=''), lineterminator='\n') + return csv.writer(io.open(csvpath, 'w', newline='', encoding="utf8"), lineterminator='\n') def yield_source_messages(self, language_id=None): """Yield all messages from source CSV files - Messages from all languages are returned. The messages are not ordered properly, but splitting the stream by language (and filtering results by merge_adjacent) will produce proper streams. @@ -307,7 +301,10 @@ class Translations(object): """ path = os.path.join(self.csv_directory, 'translations', '%s.csv' % lang) try: - file = open(path, 'r', encoding='utf-8') + if six.PY2: + file = open(path, 'r') + else: + file = open(path, 'r', encoding="utf8") except IOError: return () return yield_translation_csv_messages(file) @@ -320,7 +317,6 @@ class Translations(object): def get_load_data(self, langs=None): """Yield (translation_class, data for INSERT) pairs for loading into the DB - langs is either a list of language identifiers or None """ if langs is None: @@ -366,7 +362,6 @@ class Translations(object): def group_by_object(stream): """Group stream by object - Yields ((class name, object ID), (list of messages)) pairs. """ stream = iter(stream) @@ -384,7 +379,6 @@ def group_by_object(stream): class Merge(object): """Merge several sorted iterators together - Additional iterators may be added at any time with add_iterator. Accepts None for the initial iterators If the same value appears in more iterators, there will be duplicates in @@ -442,13 +436,10 @@ def merge_adjacent(gen): def leftjoin(left_stream, right_stream, key=lambda x: x, unused=None): """A "left join" operation on sorted iterators - Yields (left, right) pairs, where left comes from left_stream and right is the corresponding item from right, or None - Note that if there are duplicates in right_stream, you won't get duplicate rows for them. - If given, unused should be a one-arg function that will get called on all unused items in right_stream. """ @@ -587,14 +578,12 @@ def yield_translation_csv_messages(file, no_header=False): def pot_for_column(cls, column, summary=False): """Translatable texts get categorized into different POT files to help translators prioritize. The pots are: - - flavor: Flavor texts: here, strings from multiple versions are summarized - ripped: Strings ripped from the games; translators for "official" languages don't need to bother with these - effects: Fanon descriptions of things; they usually use technical language - misc: Everything else; usually small texts - Set source to true if this is a flavor summary column. Others are determined by the column itself. """ @@ -614,14 +603,11 @@ def number_replace(source, string): def match_to_source(source, *translations): """Matches translated string(s) to source - The first translation whose source matches the source message, or whose CRC matches, or which is official, and which is not fuzzy, it is used. If thre's no such translation, the first translation is used. - Returns (source, source string CRC, string for CSV file, exact match?) If there are no translations, returns (source, None, None, None) - Handles translations where numbers have been replaced by {num}, if they have source information. """ @@ -662,9 +648,7 @@ def match_to_source(source, *translations): def merge_translations(source_stream, *translation_streams, **kwargs): """For each source message, get its best translation from translations. - Translations should be ordered by priority, highest to lowest. - Messages that don't appear in translations at all aren't included. """ source = tuple(source_stream) @@ -673,4 +657,4 @@ def merge_translations(source_stream, *translation_streams, **kwargs): for t in translation_streams ] for messages in zip(source, *streams): - yield match_to_source(*messages) + yield match_to_source(*messages) \ No newline at end of file