Revert to six version-checking for CSV opening

2024-08-20 18:16:34 +00:00 · 2018-08-23 01:37:19 -04:00 · 2018-08-23 01:37:19 -04:00 · d70e95cb42
commit d70e95cb42
parent 8406878eee
2 changed files with 19 additions and 46 deletions
--- a/pokedex/db/load.py
+++ b/pokedex/db/load.py
@ -5,7 +5,6 @@ import csv
 import fnmatch
 import os.path
 import sys
 from io import open
 import six
 import sqlalchemy.sql.util
@ -17,6 +16,7 @@ from pokedex.defaults import get_default_csv_dir
 from pokedex.db.dependencies import find_dependent_tables
 from pokedex.db.oracle import rewrite_long_table_names
 def _get_table_names(metadata, patterns):
    """Returns a list of table names from the given metadata.  If `patterns`
    exists, only tables matching one of the patterns will be returned.
@ -40,7 +40,6 @@ def _get_verbose_prints(verbose):
    """If `verbose` is true, returns three functions: one for printing a
    starting message, one for printing an interim status update, and one for
    printing a success or failure message when finished.
    If `verbose` is false, returns no-op functions.
    """
@ -101,32 +100,23 @@ def _get_verbose_prints(verbose):
 def load(session, tables=[], directory=None, drop_tables=False, verbose=False, safe=True, recursive=True, langs=None):
    """Load data from CSV files into the given database session.
    Tables are created automatically.
    `session`
        SQLAlchemy session to use.
    `tables`
        List of tables to load.  If omitted, all tables are loaded.
    `directory`
        Directory the CSV files reside in.  Defaults to the `pokedex` data
        directory.
    `drop_tables`
        If set to True, existing `pokedex`-related tables will be dropped.
    `verbose`
        If set to True, status messages will be printed to stdout.
    `safe`
        If set to False, load can be faster, but can corrupt the database if
        it crashes or is interrupted.
    `recursive`
        If set to True, load all dependent tables too.
    `langs`
        List of identifiers of extra language to load, or None to load them all
    """
@ -210,7 +200,10 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
        try:
            csvpath = "%s/%s.csv" % (directory, table_name)
-            csvfile = open(csvpath, 'r', encoding='utf-8')
+            if six.PY2:
                csvfile = open(csvpath, 'r')
            else:
                csvfile = open(csvpath, 'r', encoding="utf8")
        except IOError:
            # File doesn't exist; don't load anything!
            print_done('missing?')
@ -370,20 +363,15 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
 def dump(session, tables=[], directory=None, verbose=False, langs=None):
    """Dumps the contents of a database to a set of CSV files.  Probably not
    useful to anyone besides a developer.
    `session`
        SQLAlchemy session to use.
    `tables`
        List of tables to dump.  If omitted, all tables are dumped.
    `directory`
        Directory the CSV files should be put in.  Defaults to the `pokedex`
        data directory.
    `verbose`
        If set to True, status messages will be printed to stdout.
    `langs`
        List of identifiers of languages to dump unofficial texts for
    """
@ -416,7 +404,7 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None):
        # CSV module only works with bytes on 2 and only works with text on 3!
        if six.PY3:
-            writer = csv.writer(open(filename, 'w', newline='', encoding='utf-8'), lineterminator='\n')
+            writer = csv.writer(open(filename, 'w', newline='', encoding="utf8"), lineterminator='\n')
            columns = [col.name for col in table.columns]
        else:
            writer = csv.writer(open(filename, 'wb'), lineterminator='\n')
@ -467,3 +455,4 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None):
                writer.writerow(csvs)
        print_done()
--- a/pokedex/db/translations.py
+++ b/pokedex/db/translations.py
@ -1,23 +1,18 @@
 #! /usr/bin/env python
 u"""General handling of translations
 The general idea is to get messages from somewhere: the source pokedex CSVs,
 or the translation CSVs, etc., then merge them together in some way, and shove
 them into the database.
 If a message is translated, it has a source string attached to it, with the
 original English version. Or at least it has a CRC of the original.
 When that doesn't match, it means the English string changed and the
 translation has to be updated.
 Also this is why we can't dump translations from the database: there's no
 original string info.
 Some complications:
 Flavor text is so repetitive that we take strings from all the version,
 separate the unique ones by blank lines, let translators work on that, and then
 put it in flavor_summary tables.
 Routes names and other repetitive numeric things are replaced by e.g.
 "Route {num}" so translators only have to work on each set once.
 """
@ -25,10 +20,10 @@ from __future__ import print_function
 import binascii
 import csv
 import io
 import os
 import re
 from collections import defaultdict
 from io import open
 import six
 from six.moves import zip
@ -73,12 +68,10 @@ def crc(string):
 class Message(object):
    """Holds all info about a translatable or translated string
    cls: Name of the mapped class the message belongs to
    id: The id of the thing the message belongs to
    colname: name of the database column
    strings: A list of strings in the message, usualy of length 1.
    Optional attributes (None if not set):
    colsize: Max length of the database column
    source: The string this was translated from
@ -220,9 +213,7 @@ class Translations(object):
    def write_translations(self, lang, *streams):
        """Write a translation CSV containing messages from streams.
        Streams should be ordered by priority, from highest to lowest.
        Any official translations (from the main database) are added automatically.
        """
        writer = self.writer_for_lang(lang)
@ -262,15 +253,18 @@ class Translations(object):
    def reader_for_class(self, cls, reader_class=csv.reader):
        tablename = cls.__table__.name
        csvpath = os.path.join(self.csv_directory, tablename + '.csv')
-        return reader_class(open(csvpath, 'r', encoding='utf-8'), lineterminator='\n')
+        if six.PY2:
            read = open(csvpath, 'r')
        else:
            read = open(csvpath, 'r', encoding='utf-8')
        return reader_class(read, lineterminator='\n')
    def writer_for_lang(self, lang):
        csvpath = os.path.join(self.translation_directory, '%s.csv' % lang)
-        return csv.writer(open(csvpath, 'w', encoding='utf-8', newline=''), lineterminator='\n')
+        return csv.writer(io.open(csvpath, 'w', newline='', encoding="utf8"), lineterminator='\n')
    def yield_source_messages(self, language_id=None):
        """Yield all messages from source CSV files
        Messages from all languages are returned. The messages are not ordered
        properly, but splitting the stream by language (and filtering results
        by merge_adjacent) will produce proper streams.
@ -307,7 +301,10 @@ class Translations(object):
        """
        path = os.path.join(self.csv_directory, 'translations', '%s.csv' % lang)
        try:
-            file = open(path, 'r', encoding='utf-8')
+            if six.PY2:
                file = open(path, 'r')
            else:
                file = open(path, 'r', encoding="utf8")
        except IOError:
            return ()
        return yield_translation_csv_messages(file)
@ -320,7 +317,6 @@ class Translations(object):
    def get_load_data(self, langs=None):
        """Yield (translation_class, data for INSERT) pairs for loading into the DB
        langs is either a list of language identifiers or None
        """
        if langs is None:
@ -366,7 +362,6 @@ class Translations(object):
 def group_by_object(stream):
    """Group stream by object
    Yields ((class name, object ID), (list of messages)) pairs.
    """
    stream = iter(stream)
@ -384,7 +379,6 @@ def group_by_object(stream):
 class Merge(object):
    """Merge several sorted iterators together
    Additional iterators may be added at any time with add_iterator.
    Accepts None for the initial iterators
    If the same value appears in more iterators, there will be duplicates in
@ -442,13 +436,10 @@ def merge_adjacent(gen):
 def leftjoin(left_stream, right_stream, key=lambda x: x, unused=None):
    """A "left join" operation on sorted iterators
    Yields (left, right) pairs, where left comes from left_stream and right
    is the corresponding item from right, or None
    Note that if there are duplicates in right_stream, you won't get duplicate
    rows for them.
    If given, unused should be a one-arg function that will get called on all
    unused items in right_stream.
    """
@ -587,14 +578,12 @@ def yield_translation_csv_messages(file, no_header=False):
 def pot_for_column(cls, column, summary=False):
    """Translatable texts get categorized into different POT files to help
       translators prioritize. The pots are:
    - flavor: Flavor texts: here, strings from multiple versions are summarized
    - ripped: Strings ripped from the games; translators for "official"
      languages don't need to bother with these
    - effects: Fanon descriptions of things; they usually use technical
      language
    - misc: Everything else; usually small texts
    Set source to true if this is a flavor summary column. Others are
    determined by the column itself.
    """
@ -614,14 +603,11 @@ def number_replace(source, string):
 def match_to_source(source, *translations):
    """Matches translated string(s) to source
    The first translation whose source matches the source message, or whose CRC
    matches, or which is official, and which is not fuzzy, it is used.
    If thre's no such translation, the first translation is used.
    Returns (source, source string CRC, string for CSV file, exact match?)
    If there are no translations, returns (source, None, None, None)
    Handles translations where numbers have been replaced by {num}, if they
    have source information.
    """
@ -662,9 +648,7 @@ def match_to_source(source, *translations):
 def merge_translations(source_stream, *translation_streams, **kwargs):
    """For each source message, get its best translation from translations.
    Translations should be ordered by priority, highest to lowest.
    Messages that don't appear in translations at all aren't included.
    """
    source = tuple(source_stream)
@ -673,4 +657,4 @@ def merge_translations(source_stream, *translation_streams, **kwargs):
            for t in translation_streams
        ]
    for messages in zip(source, *streams):
-        yield match_to_source(*messages)
+        yield match_to_source(*messages)